poLCA_ejemplos.R

library(poLCA)

## Loading required package: scatterplot3d

## Loading required package: MASS

library(foreign)


datos <- read.spss("C:/Users/carlos/Downloads/DATOS LM/md3057/datos3057.sav",
                   to.data.frame = TRUE)

## Warning in read.spss("C:/Users/carlos/Downloads/DATOS LM/md3057/
## datos3057.sav", : C:/Users/carlos/Downloads/DATOS LM/md3057/datos3057.sav:
## Unrecognized record type 7, subtype 18 encountered in system file

## Warning in `levels<-`(`*tmp*`, value = if (nl == nL) as.character(labels)
## else paste0(labels, : duplicated levels in factors are deprecated

sapply(datos[,52:58], FUN= levels)

##      P1501                P1502                P1503               
## [1,] "Muy necesario"      "Muy necesario"      "Muy necesario"     
## [2,] "Bastante necesario" "Bastante necesario" "Bastante necesario"
## [3,] "Poco necesario"     "Poco necesario"     "Poco necesario"    
## [4,] "Nada necesario"     "Nada necesario"     "Nada necesario"    
## [5,] "No sabe lo que es"  "No sabe lo que es"  "No sabe lo que es" 
## [6,] "N.S."               "N.S."               "N.S."              
## [7,] "N.C."               "N.C."               "N.C."              
##      P1504                P1505                P1506               
## [1,] "Muy necesario"      "Muy necesario"      "Muy necesario"     
## [2,] "Bastante necesario" "Bastante necesario" "Bastante necesario"
## [3,] "Poco necesario"     "Poco necesario"     "Poco necesario"    
## [4,] "Nada necesario"     "Nada necesario"     "Nada necesario"    
## [5,] "No sabe lo que es"  "No sabe lo que es"  "No sabe lo que es" 
## [6,] "N.S."               "N.S."               "N.S."              
## [7,] "N.C."               "N.C."               "N.C."              
##      P1507               
## [1,] "Muy necesario"     
## [2,] "Bastante necesario"
## [3,] "Poco necesario"    
## [4,] "Nada necesario"    
## [5,] "No sabe lo que es" 
## [6,] "N.S."              
## [7,] "N.C."

str(datos)

## 'data.frame':    2476 obs. of  261 variables:
##  $ ESTU       : num  3057 3057 3057 3057 3057 ...
##  $ CUES       : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ CCAA       : Factor w/ 19 levels "Andalucía","Aragón",..: 16 16 16 16 16 16 16 16 7 7 ...
##  $ PROV       : Factor w/ 52 levels "Araba-Álava",..: 1 1 1 1 1 1 1 1 2 2 ...
##  $ MUN        : Factor w/ 64 levels "Mun.<100.000 hab.",..: 31 31 31 31 31 31 31 31 1 1 ...
##  $ TAMUNI     : Factor w/ 7 levels "Menos o igual a 2.000 habitantes",..: 5 5 5 5 5 5 5 5 2 2 ...
##  $ AREA       : Factor w/ 2 levels "No pertenece",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ DISTR      : Factor w/ 1 level "Anonimizado": 1 1 1 1 1 1 1 1 1 1 ...
##  $ SECCION    : Factor w/ 1 level "Anonimizado": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ENTREV     : Factor w/ 1 level "Anonimizado": 1 1 1 1 1 1 1 1 1 1 ...
##  $ P0         : Factor w/ 3 levels "Española","Española y otra",..: 1 1 1 1 2 1 1 1 1 1 ...
##  $ P0A        : Factor w/ 199 levels "Albanesa","Austriaca",..: NA NA NA NA 138 NA NA NA NA NA ...
##  $ P1         : Factor w/ 7 levels "Muy buena","Buena",..: 3 4 4 4 3 4 4 4 3 4 ...
##  $ P2         : Factor w/ 5 levels "Mejor","Igual",..: 2 2 2 2 2 2 2 2 1 2 ...
##  $ P3         : Factor w/ 5 levels "Mejor","Igual",..: 2 2 2 2 2 2 2 2 1 1 ...
##  $ P4         : Factor w/ 7 levels "Muy buena","Buena",..: 6 5 5 5 4 5 4 5 6 6 ...
##  $ P5         : Factor w/ 5 levels "Mejor","Igual",..: 2 2 2 2 2 2 2 2 1 4 ...
##  $ P6         : Factor w/ 5 levels "Mejor","Igual",..: 2 2 2 2 2 2 2 2 1 4 ...
##  $ P701       : Factor w/ 47 levels "El paro","Las drogas",..: 1 1 13 11 1 8 1 11 1 1 ...
##  $ P702       : Factor w/ 47 levels "El paro","Las drogas",..: 8 8 8 1 8 11 11 1 16 47 ...
##  $ P703       : Factor w/ 47 levels "El paro","Las drogas",..: 47 11 6 47 13 16 16 8 47 47 ...
##  $ P801       : Factor w/ 47 levels "El paro","Las drogas",..: 1 1 13 11 1 8 1 11 1 45 ...
##  $ P802       : Factor w/ 47 levels "El paro","Las drogas",..: 8 8 8 47 8 11 11 8 47 47 ...
##  $ P803       : Factor w/ 47 levels "El paro","Las drogas",..: 47 11 6 47 13 47 16 47 47 47 ...
##  $ P9         : atomic  7 6 6 6 6 6 6 6 3 98 ...
##   ..- attr(*, "value.labels")= Named num  99 98 10 0
##   .. ..- attr(*, "names")= chr  "N.C." "N.S." "10 Se puede confiar en la mayoría de la gente" "0 Nunca se es lo bastante prudente"
##  $ P10        : atomic  6 6 6 6 6 6 5 6 3 5 ...
##   ..- attr(*, "value.labels")= Named num  99 98 10 0
##   .. ..- attr(*, "names")= chr  "N.C." "N.S." "10 La mayoría de la gente procura ayudar a los demás" "0 La mayoría de la gente sólo mira por sí misma"
##  $ P11        : atomic  7 5 8 7 6 5 6 8 3 98 ...
##   ..- attr(*, "value.labels")= Named num  99 98 10 0
##   .. ..- attr(*, "names")= chr  "N.C." "N.S." "10 Máxima seguridad aun perdiendo accesibilidad a la información" "0 Máximo acceso a la información aun perdiendo seguridad"
##  $ P1201      : atomic  9 8 8 98 9 8 9 8 4 8 ...
##   ..- attr(*, "value.labels")= Named num  99 98 10 0
##   .. ..- attr(*, "names")= chr  "N.C." "N.S." "10 Totalmente de acuerdo" "0 Totalmente en desacuerdo"
##  $ P1202      : atomic  9 8 8 98 9 8 9 8 6 10 ...
##   ..- attr(*, "value.labels")= Named num  99 98 10 0
##   .. ..- attr(*, "names")= chr  "N.C." "N.S." "10 Totalmente de acuerdo" "0 Totalmente en desacuerdo"
##  $ P1203      : atomic  9 8 8 98 9 5 9 8 5 8 ...
##   ..- attr(*, "value.labels")= Named num  99 98 10 0
##   .. ..- attr(*, "names")= chr  "N.C." "N.S." "10 Totalmente de acuerdo" "0 Totalmente en desacuerdo"
##  $ P1204      : atomic  9 8 8 98 9 7 9 8 4 98 ...
##   ..- attr(*, "value.labels")= Named num  99 98 10 0
##   .. ..- attr(*, "names")= chr  "N.C." "N.S." "10 Totalmente de acuerdo" "0 Totalmente en desacuerdo"
##  $ P1301      : Factor w/ 6 levels "Muy importante",..: 1 2 1 2 2 1 2 2 2 1 ...
##  $ P1302      : Factor w/ 6 levels "Muy importante",..: 1 2 1 2 2 1 5 2 1 1 ...
##  $ P1303      : Factor w/ 6 levels "Muy importante",..: 1 2 1 1 2 1 2 2 1 1 ...
##  $ P1304      : Factor w/ 6 levels "Muy importante",..: 1 2 2 1 2 2 2 2 2 1 ...
##  $ P1305      : Factor w/ 6 levels "Muy importante",..: 1 2 2 3 2 2 2 2 2 1 ...
##  $ P1306      : Factor w/ 6 levels "Muy importante",..: 2 2 1 2 2 2 5 2 2 1 ...
##  $ P1307      : Factor w/ 6 levels "Muy importante",..: 2 2 2 2 2 2 2 2 1 1 ...
##  $ P1308      : Factor w/ 6 levels "Muy importante",..: 2 2 2 2 2 2 2 2 1 1 ...
##  $ P1309      : Factor w/ 6 levels "Muy importante",..: 2 2 2 2 2 2 2 2 2 1 ...
##  $ P1401      : Factor w/ 9 levels "Totalmente satisfactoria",..: 1 2 1 6 1 5 2 6 1 6 ...
##  $ P1402      : Factor w/ 9 levels "Totalmente satisfactoria",..: 1 2 1 6 1 5 6 6 1 6 ...
##  $ P1403      : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 2 1 2 1 2 2 2 1 2 ...
##  $ P1404      : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 2 1 2 1 2 6 2 1 2 ...
##  $ P1405      : Factor w/ 9 levels "Totalmente satisfactoria",..: 6 2 1 2 1 2 6 2 2 1 ...
##  $ P1406      : Factor w/ 9 levels "Totalmente satisfactoria",..: 6 2 1 2 1 2 6 2 2 6 ...
##  $ P1407      : Factor w/ 9 levels "Totalmente satisfactoria",..: 6 6 6 2 6 2 6 6 6 1 ...
##  $ P1408      : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 6 6 6 6 5 6 6 6 6 ...
##  $ P1409      : Factor w/ 9 levels "Totalmente satisfactoria",..: 6 2 2 6 6 5 6 6 2 6 ...
##  $ P1410      : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 2 2 2 2 3 2 3 2 6 ...
##  $ P1411      : Factor w/ 9 levels "Totalmente satisfactoria",..: 2 2 2 3 2 3 2 3 3 2 ...
##  $ P1501      : Factor w/ 7 levels "Muy necesario",..: 1 2 3 2 1 2 3 1 3 1 ...
##  $ P1502      : Factor w/ 7 levels "Muy necesario",..: 2 1 1 2 1 1 3 1 2 2 ...
##  $ P1503      : Factor w/ 7 levels "Muy necesario",..: 2 1 1 3 1 1 3 2 1 2 ...
##  $ P1504      : Factor w/ 7 levels "Muy necesario",..: 1 3 1 6 1 4 3 2 3 2 ...
##  $ P1505      : Factor w/ 7 levels "Muy necesario",..: 1 3 4 5 3 4 3 4 3 5 ...
##  $ P1506      : Factor w/ 7 levels "Muy necesario",..: 1 3 3 5 1 4 3 2 3 5 ...
##  $ P1507      : Factor w/ 7 levels "Muy necesario",..: 1 3 2 5 2 4 3 3 3 5 ...
##  $ P1601      : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 1 1 2 1 1 1 1 1 1 ...
##  $ P1602      : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 1 1 2 1 2 1 1 1 2 ...
##  $ P1603      : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 1 1 2 1 2 1 1 2 2 ...
##  $ P1604      : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 1 1 2 1 2 1 1 1 3 ...
##  $ P1605      : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 2 1 2 1 2 1 2 1 3 ...
##  $ P1606      : Factor w/ 5 levels "Sí","No","No sabe lo que es",..: 1 2 1 2 1 2 1 2 2 3 ...
##  $ P1701      : Factor w/ 9 levels "Continuamente",..: 1 1 2 NA 2 3 2 2 3 5 ...
##  $ P1702      : Factor w/ 9 levels "Continuamente",..: 5 1 2 NA 2 NA 2 1 2 NA ...
##  $ P1703      : Factor w/ 9 levels "Continuamente",..: 1 1 2 NA 2 NA 2 1 NA NA ...
##  $ P1704      : Factor w/ 9 levels "Continuamente",..: 1 1 4 NA 2 NA 2 2 3 NA ...
##  $ P1705      : Factor w/ 9 levels "Continuamente",..: 1 NA 7 NA 3 NA 7 NA 3 NA ...
##  $ P1706      : Factor w/ 9 levels "Continuamente",..: 1 NA 6 NA 3 NA 7 NA NA NA ...
##  $ P1801      : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 1 1 NA 1 4 1 1 1 4 ...
##  $ P1802      : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 1 1 NA 1 4 1 1 1 3 ...
##  $ P1803      : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 2 2 1 NA 1 4 1 1 2 4 ...
##  $ P1804      : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 2 2 2 NA 2 4 2 2 2 4 ...
##  $ P1805      : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 2 2 NA 2 4 2 2 2 4 ...
##  $ P1806      : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 1 1 NA 2 4 1 2 1 4 ...
##  $ P1807      : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 1 2 2 NA 2 4 2 2 2 4 ...
##  $ P1808      : Factor w/ 6 levels "Sí","No","No sabe lo que es",..: 2 2 2 NA 2 4 2 2 2 4 ...
##  $ P1901      : Factor w/ 8 levels "Mucho","Bastante",..: 4 4 3 NA 3 4 4 3 3 4 ...
##  $ P1902      : Factor w/ 8 levels "Mucho","Bastante",..: 5 4 4 NA 3 4 5 3 3 6 ...
##  $ P1903      : Factor w/ 8 levels "Mucho","Bastante",..: 2 2 4 NA 2 4 3 3 2 3 ...
##  $ P1904      : Factor w/ 8 levels "Mucho","Bastante",..: 2 3 3 NA 2 4 3 3 3 6 ...
##  $ P1905      : Factor w/ 8 levels "Mucho","Bastante",..: 3 3 4 NA 3 4 3 3 4 6 ...
##  $ P1906      : Factor w/ 8 levels "Mucho","Bastante",..: 4 4 1 NA 3 4 4 3 4 6 ...
##  $ P20        : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: 2 2 2 NA 2 2 2 2 2 3 ...
##  $ P21        : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: 2 2 2 NA 2 2 2 2 2 3 ...
##  $ P22        : Factor w/ 3 levels "Sí","No","N.C.": 2 1 1 1 1 1 2 1 1 1 ...
##  $ P23        : atomic  NA 1 2 2 1 2 NA 3 2 2 ...
##   ..- attr(*, "value.labels")= Named num  99 4 3 2 1
##   .. ..- attr(*, "names")= chr  "N.C." "Cuatro " "Tres " "Dos " ...
##  $ P2401      : atomic  NA 19 0 45 4 35 NA 31 20 98 ...
##   ..- attr(*, "value.labels")= Named num  99 98 97
##   .. ..- attr(*, "names")= chr  "N.C." "N.R." "N.P.(No tiene más hijos/as)"
##  $ P2402      : atomic  NA 97 3 47 97 43 NA 33 25 98 ...
##   ..- attr(*, "value.labels")= Named num  99 98 97
##   .. ..- attr(*, "names")= chr  "N.C." "N.R." "N.P.(No tiene más hijos/as)"
##  $ P2403      : atomic  NA 97 97 97 97 97 NA 37 97 97 ...
##   ..- attr(*, "value.labels")= Named num  99 98 97
##   .. ..- attr(*, "names")= chr  "N.C." "N.R." "N.P.(No tiene más hijos/as)"
##  $ P2404      : atomic  NA 97 97 97 97 97 NA 97 97 97 ...
##   ..- attr(*, "value.labels")= Named num  99 98 97
##   .. ..- attr(*, "names")= chr  "N.C." "N.R." "N.P.(No tiene más hijos/as)"
##  $ P25        : Factor w/ 6 levels "Sí, he hablado de ese tema varias veces con ellos/as ",..: NA 2 NA NA NA NA NA NA 2 NA ...
##  $ P26        : Factor w/ 7 levels "Lo hablamos y tiende a prevalecer la opinión de los padres",..: NA 4 NA NA NA NA NA NA 3 NA ...
##  $ P2701      : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 2 NA NA NA 2 NA 2 1 NA ...
##  $ P2702      : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 1 NA NA NA 3 NA 1 1 NA ...
##  $ P2703      : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 1 NA NA NA 3 NA 2 1 NA ...
##  $ P2704      : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 2 NA NA NA 3 NA 2 1 NA ...
##  $ P2705      : Factor w/ 5 levels "Sí","No","(NO LEER) No procede (no usa esa TIC)",..: NA 2 NA NA NA 3 NA 2 1 NA ...
##   [list output truncated]
##  - attr(*, "variable.labels")= Named chr  "" "" "Comunidad autónoma" "Provincia" ...
##   ..- attr(*, "names")= chr  "ESTU" "CUES" "CCAA" "PROV" ...
##  - attr(*, "codepage")= int 1252

datos00 <- datos[,52:58]
datos00 <- as.data.frame(sapply(datos00, as.numeric))

f <- cbind(P1501,P1502,P1503,P1504,P1505,P1506,P1507)~1

set.seed(10)

v.lca <- poLCA(formula=f, na.rm=TRUE, data=datos00, nclass=2)

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $P1501
##            Pr(1)  Pr(2)  Pr(3)  Pr(4) Pr(5)  Pr(6)  Pr(7)
## class 1:  0.4851 0.4229 0.0871 0.0044     0 0.0005 0.0000
## class 2:  0.1439 0.3399 0.2995 0.2103     0 0.0032 0.0032
## 
## $P1502
##            Pr(1)  Pr(2)  Pr(3)  Pr(4) Pr(5)  Pr(6)  Pr(7)
## class 1:  0.3597 0.4407 0.1810 0.0187 0.000 0.0000 0.0000
## class 2:  0.0034 0.0820 0.1394 0.7385 0.008 0.0223 0.0064
## 
## $P1503
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.4568 0.4255 0.1135 0.0006 0.0000 0.0010 0.0027
## class 2:  0.0043 0.0947 0.1327 0.7105 0.0271 0.0242 0.0064
## 
## $P1504
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.3197 0.4071 0.2331 0.0294 0.0029 0.0046 0.0032
## class 2:  0.0000 0.0214 0.0911 0.7754 0.0776 0.0295 0.0049
## 
## $P1505
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.1212 0.2324 0.4494 0.1844 0.0023 0.0076 0.0027
## class 2:  0.0016 0.0022 0.0195 0.8176 0.1320 0.0222 0.0049
## 
## $P1506
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.3096 0.4174 0.2217 0.0430 0.0032 0.0030 0.0021
## class 2:  0.0107 0.0724 0.0688 0.6989 0.1214 0.0229 0.0049
## 
## $P1507
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.1268 0.2251 0.4179 0.2080 0.0110 0.0091 0.0021
## class 2:  0.0138 0.0078 0.0151 0.7832 0.1541 0.0210 0.0049
## 
## Estimated class population shares 
##  0.7465 0.2535 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.7488 0.2512 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 2476 
## number of estimated parameters: 85 
## residual degrees of freedom: 2391 
## maximum log-likelihood: -20683.16 
##  
## AIC(2): 41536.33
## BIC(2): 42030.55
## G^2(2): 12512.4 (Likelihood ratio/deviance statistic) 
## X^2(2): 8.244755e+13 (Chi-square goodness of fit) 
##

plot(v.lca)

v.lca <- poLCA(formula= f, na.rm=TRUE, data=datos00, nclass=3)

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $P1501
##            Pr(1)  Pr(2)  Pr(3)  Pr(4) Pr(5)  Pr(6)  Pr(7)
## class 1:  0.8166 0.1574 0.0224 0.0036     0 0.0000 0.0000
## class 2:  0.1486 0.3222 0.2944 0.2347     0 0.0000 0.0000
## class 3:  0.2153 0.6196 0.1525 0.0080     0 0.0027 0.0018
## 
## $P1502
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.7843 0.1484 0.0530 0.0143 0.0000 0.0000 0.0000
## class 2:  0.0062 0.0323 0.1050 0.8331 0.0092 0.0124 0.0018
## class 3:  0.0216 0.6524 0.2884 0.0284 0.0000 0.0065 0.0027
## 
## $P1503
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.9591 0.0357 0.0028 0.0011 0.0000 0.0000 0.0013
## class 2:  0.0076 0.0401 0.0957 0.8115 0.0312 0.0124 0.0016
## class 3:  0.0545 0.7131 0.2139 0.0029 0.0000 0.0092 0.0064
## 
## $P1504
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.7046 0.1962 0.0813 0.0125 0.0018 0.0012 0.0024
## class 2:  0.0000 0.0035 0.0451 0.8591 0.0878 0.0045 0.0000
## class 3:  0.0148 0.5419 0.3562 0.0556 0.0041 0.0212 0.0063
## 
## $P1505
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.2682 0.2537 0.3345 0.1368 0.0025 0.0030 0.0014
## class 2:  0.0018 0.0019 0.0099 0.8452 0.1398 0.0014 0.0000
## class 3:  0.0048 0.2001 0.5066 0.2522 0.0078 0.0223 0.0062
## 
## $P1506
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.5846 0.2811 0.1000 0.0293 0.0025 0.0000 0.0025
## class 2:  0.0116 0.0537 0.0529 0.7512 0.1269 0.0037 0.0000
## class 3:  0.0861 0.5009 0.3073 0.0755 0.0096 0.0161 0.0045
## 
## $P1507
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)  Pr(5)  Pr(6)  Pr(7)
## class 1:  0.2629 0.2503 0.3259 0.1548 0.0049 0.0012 0.0000
## class 2:  0.0147 0.0073 0.0093 0.8062 0.1607 0.0018 0.0000
## class 3:  0.0185 0.1910 0.4585 0.2779 0.0227 0.0251 0.0063
## 
## Estimated class population shares 
##  0.3293 0.2204 0.4503 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3308 0.2217 0.4475 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 2476 
## number of estimated parameters: 128 
## residual degrees of freedom: 2348 
## maximum log-likelihood: -18646.23 
##  
## AIC(3): 37548.45
## BIC(3): 38292.69
## G^2(3): 8438.526 (Likelihood ratio/deviance statistic) 
## X^2(3): 1.065842e+14 (Chi-square goodness of fit) 
##

plot(v.lca)

datos$clases <- v.lca$predclass

library(lattice)

datos$P48<-as.numeric(as.character(datos$P48))
bwplot(P48~as.factor(clases), data = datos, ylab="EDAD")

histogram(~as.factor(P11) | as.factor(clases), data=datos, 
          xlab="(+) Acceso - (+) Seguridad",
          ylab="Porcentaje total")

datos01<-subset(datos, subset = P11<98)
tapply(X=datos01$P11, INDEX = datos01$clases, FUN = mean)

##        1        2        3 
## 5.175505 6.561321 5.493732

histogram(~as.factor(P1204) | as.factor(clases), data=datos,
          xlab="(+) desacuerdo - (+) acuerdo",
          ylab="Porcentaje total")

datos02<-subset(datos, subset = P1204<98)
tapply(X=datos02$P1204, INDEX = datos02$clases, FUN = mean)

##        1        2        3 
## 3.335006 5.071078 4.208780

#Fuente de los datos:
  
# Centro de Investigaciones Sociológicas. (CIS).
#Título: BARÓMETRO DE MARZO 2015.
#Número: 3057.
#Fecha: 02-03-2015.
#Fecha de descarga: 10 de marzo de 2016.


## Replication of latent class models in Agresti (2002, p. 543),
## Table 13.2 and Table 13.3.




#Diagnoses of carcinoma (sample data)

data(carcinoma)
f <- cbind(A,B,C,D,E,F,G)~1
lca2 <- poLCA(f,carcinoma,nclass=2) # log-likelihood: -317.2568

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.8835 0.1165
## class 2:  0.0000 1.0000
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.6456 0.3544
## class 2:  0.0169 0.9831
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.2391 0.7609
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.4589 0.5411
## 
## $E
##            Pr(1)  Pr(2)
## class 1:  0.7771 0.2229
## class 2:  0.0214 0.9786
## 
## $F
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.5773 0.4227
## 
## $G
##            Pr(1)  Pr(2)
## class 1:  0.8835 0.1165
## class 2:  0.0000 1.0000
## 
## Estimated class population shares 
##  0.4988 0.5012 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.5 0.5 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 118 
## number of estimated parameters: 15 
## residual degrees of freedom: 103 
## maximum log-likelihood: -317.2568 
##  
## AIC(2): 664.5137
## BIC(2): 706.0739
## G^2(2): 62.36543 (Likelihood ratio/deviance statistic) 
## X^2(2): 92.64814 (Chi-square goodness of fit) 
##

lca3 <- poLCA(f,carcinoma,nclass=3) # log-likelihood: -293.705

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.4872 0.5128
## class 3:  0.9427 0.0573
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.0191 0.9809
## class 2:  0.0000 1.0000
## class 3:  0.8621 0.1379
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.1425 0.8575
## class 2:  1.0000 0.0000
## class 3:  1.0000 0.0000
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.4138 0.5862
## class 2:  0.9424 0.0576
## class 3:  1.0000 0.0000
## 
## $E
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.2494 0.7506
## class 3:  0.9449 0.0551
## 
## $F
##            Pr(1)  Pr(2)
## class 1:  0.5236 0.4764
## class 2:  1.0000 0.0000
## class 3:  1.0000 0.0000
## 
## $G
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.3693 0.6307
## class 3:  1.0000 0.0000
## 
## Estimated class population shares 
##  0.4447 0.1817 0.3736 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.4322 0.1949 0.3729 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 118 
## number of estimated parameters: 23 
## residual degrees of freedom: 95 
## maximum log-likelihood: -293.705 
##  
## AIC(3): 633.41
## BIC(3): 697.1357
## G^2(3): 15.26171 (Likelihood ratio/deviance statistic) 
## X^2(3): 20.50336 (Chi-square goodness of fit) 
##

lca4 <- poLCA(f,carcinoma,nclass=4,nrep=10,maxiter=5000) # log-likelihood: -289.2858

## Model 1: llik = -289.7889 ... best llik = -289.7889
## Model 2: llik = -292.493 ... best llik = -289.7889
## Model 3: llik = -291.2649 ... best llik = -289.7889
## Model 4: llik = -289.2858 ... best llik = -289.2858
## Model 5: llik = -289.7889 ... best llik = -289.2858
## Model 6: llik = -289.7889 ... best llik = -289.2858
## Model 7: llik = -291.2649 ... best llik = -289.2858
## Model 8: llik = -292.493 ... best llik = -289.2858
## Model 9: llik = -289.2858 ... best llik = -289.2858
## Model 10: llik = -289.2858 ... best llik = -289.2858
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.0000 1.0000
## class 3:  0.4634 0.5366
## class 4:  0.9422 0.0578
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.0905 0.9095
## class 3:  0.0000 1.0000
## class 4:  0.8584 0.1416
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.1561 0.8439
## class 2:  0.0186 0.9814
## class 3:  1.0000 0.0000
## class 4:  1.0000 0.0000
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.2421 0.7579
## class 2:  1.0000 0.0000
## class 3:  0.9404 0.0596
## class 4:  1.0000 0.0000
## 
## $E
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.0000 1.0000
## class 3:  0.2341 0.7659
## class 4:  0.9443 0.0557
## 
## $F
##            Pr(1)  Pr(2)
## class 1:  0.3823 0.6177
## class 2:  1.0000 0.0000
## class 3:  1.0000 0.0000
## class 4:  1.0000 0.0000
## 
## $G
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.0000 1.0000
## class 3:  0.3482 0.6518
## class 4:  1.0000 0.0000
## 
## Estimated class population shares 
##  0.343 0.0936 0.1882 0.3751 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3136 0.1186 0.1949 0.3729 
##  
## ========================================================= 
## Fit for 4 latent classes: 
## ========================================================= 
## number of observations: 118 
## number of estimated parameters: 31 
## residual degrees of freedom: 87 
## maximum log-likelihood: -289.2858 
##  
## AIC(4): 640.5717
## BIC(4): 726.4629
## G^2(4): 6.423452 (Likelihood ratio/deviance statistic) 
## X^2(4): 10.08438 (Chi-square goodness of fit) 
##

## Example 1. Two-class LCA. (Table 3.3, p. 32)
##PAM y el engaño crónico (datos de la muestra)


data(cheating)
f <- cbind(LIEEXAM,LIEPAPER,FRAUD,COPYEXAM)~1
ch2 <- poLCA(f,cheating,nclass=2) # log-likelihood: -440.0271

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $LIEEXAM
##            Pr(1)  Pr(2)
## class 1:  0.4231 0.5769
## class 2:  0.9834 0.0166
## 
## $LIEPAPER
##            Pr(1)  Pr(2)
## class 1:  0.4109 0.5891
## class 2:  0.9708 0.0292
## 
## $FRAUD
##            Pr(1)  Pr(2)
## class 1:  0.7840 0.2160
## class 2:  0.9629 0.0371
## 
## $COPYEXAM
##            Pr(1)  Pr(2)
## class 1:  0.6236 0.3764
## class 2:  0.8181 0.1819
## 
## Estimated class population shares 
##  0.1606 0.8394 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.1693 0.8307 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 319 
## number of estimated parameters: 9 
## residual degrees of freedom: 6 
## maximum log-likelihood: -440.0271 
##  
## AIC(2): 898.0542
## BIC(2): 931.9409
## G^2(2): 7.764242 (Likelihood ratio/deviance statistic) 
## X^2(2): 8.3234 (Chi-square goodness of fit) 
##

##
## Example 2. Two-class latent class regression using
## GPA as a covariate to predict class membership as
## "cheaters" vs. "non-cheaters".
## (Table 7.1, p. 85, and Figure 7.1, p. 86)
##
f2 <- cbind(LIEEXAM,LIEPAPER,FRAUD,COPYEXAM)~GPA
ch2c <- poLCA(f2,cheating,nclass=2) # log-likelihood: -429.6384

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $LIEEXAM
##            Pr(1)  Pr(2)
## class 1:  0.9903 0.0097
## class 2:  0.4389 0.5611
## 
## $LIEPAPER
##            Pr(1)  Pr(2)
## class 1:  0.9647 0.0353
## class 2:  0.4858 0.5142
## 
## $FRAUD
##            Pr(1)  Pr(2)
## class 1:  0.9655 0.0345
## class 2:  0.7850 0.2150
## 
## $COPYEXAM
##            Pr(1)  Pr(2)
## class 1:  0.8257 0.1743
## class 2:  0.5925 0.4075
## 
## Estimated class population shares 
##  0.8219 0.1781 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.8508 0.1492 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## 2 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)     0.11343     0.50992    0.222     0.833
## GPA            -0.84248     0.28131   -2.995     0.030
## ========================================================= 
## number of observations: 315 
## number of estimated parameters: 10 
## residual degrees of freedom: 5 
## maximum log-likelihood: -429.6384 
##  
## AIC(2): 879.2768
## BIC(2): 916.8025
## X^2(2): 8.641731 (Chi-square goodness of fit) 
##  
## ALERT: estimation algorithm automatically restarted with new initial values 
##

GPAmat <- cbind(1,c(1:5))
exb <- exp(GPAmat %*% ch2c$coeff)
matplot(c(1:5),cbind(1/(1+exb),exb/(1+exb)),type="l",lwd=2,
        main="GPA as a predictor of persistent cheating",
        xlab="GPA category, low to high",
        ylab="Probability of latent class membership")

text(1.7,0.3,"Cheaters")
text(1.7,0.7,"Non-cheaters")
##
## Compare results from Example 1 to Example 2.
## Non-simultaneous estimation of effect of GPA on latent class
## membership biases the estimated effect in Example 1.
##
cheatcl <- which.min(ch2$P)
predcc <- sapply(c(1:5),function(v) mean(ch2$posterior[cheating$GPA==v,cheatcl],na.rm=TRUE))
## Having run Ex.2, add to plot:
matplot(c(1:5),cbind(1-predcc,predcc),type="l",lwd=2,add=TRUE)
text(4,0.14,"Cheaters\n (non-simul. estimate)")
text(4,0.87,"Non-cheaters\n (non-simul. estimate)")

#2000 National Election Studies survey (sample data)

# Latent class models with one (loglinear independence) to three classes

data(election)

f <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
           MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~1
nes1 <- poLCA(f,election,nclass=1) # log-likelihood: -18647.31

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $MORALG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.2578       0.4783         0.1808            0.0831
## 
## $CARESG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1594       0.4272         0.2807            0.1327
## 
## $KNOWG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.2723       0.5622         0.1297            0.0359
## 
## $LEADG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:             0.148       0.4249         0.3143            0.1129
## 
## $DISHONG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0877       0.1869         0.3898            0.3356
## 
## $INTELG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:             0.286       0.5667         0.1098            0.0374
## 
## $MORALB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:             0.209       0.5195         0.2136             0.058
## 
## $CARESB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0999       0.3791         0.3349            0.1861
## 
## $KNOWB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1594       0.5454         0.2174            0.0778
## 
## $LEADB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1632       0.5072         0.2426             0.087
## 
## $DISHONB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0427         0.18         0.4218            0.3555
## 
## $INTELB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1937       0.5545         0.1869            0.0648
## 
## Estimated class population shares 
##  1 
##  
## Predicted class memberships (by modal posterior prob.) 
##  1 
##  
## ========================================================= 
## Fit for 1 latent classes: 
## ========================================================= 
## number of observations: 1311 
## number of estimated parameters: 36 
## residual degrees of freedom: 1275 
## maximum log-likelihood: -18647.31 
##  
## AIC(1): 37366.62
## BIC(1): 37553.05
## G^2(1): 18882 (Likelihood ratio/deviance statistic) 
## X^2(1): 29318840470 (Chi-square goodness of fit) 
##

nes2 <- poLCA(f,election,nclass=2) # log-likelihood: -17344.92

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $MORALG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.4247       0.4944         0.0623            0.0186
## class 2:            0.1172       0.4647         0.2806            0.1375
## 
## $CARESG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.2982       0.5298         0.1208            0.0512
## class 2:            0.0426       0.3407         0.4154            0.2014
## 
## $KNOWG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.4354       0.5029         0.0404            0.0214
## class 2:            0.1349       0.6121         0.2049            0.0481
## 
## $LEADG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.2791       0.5508         0.1417            0.0284
## class 2:            0.0375       0.3188         0.4596            0.1840
## 
## $DISHONG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0350        0.068         0.3921             0.505
## class 2:            0.1321        0.287         0.3879             0.193
## 
## $INTELG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.4349       0.4951         0.0454            0.0246
## class 2:            0.1606       0.6271         0.1641            0.0481
## 
## $MORALB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0799       0.4465         0.3564            0.1172
## class 2:            0.3177       0.5809         0.0933            0.0081
## 
## $CARESB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0138       0.1376         0.4693            0.3793
## class 2:            0.1724       0.5825         0.2216            0.0234
## 
## $KNOWB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0579       0.3873         0.3849            0.1699
## class 2:            0.2449       0.6786         0.0763            0.0002
## 
## $LEADB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0259       0.3339         0.4560            0.1842
## class 2:            0.2789       0.6532         0.0628            0.0050
## 
## $DISHONB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0688       0.2796         0.4650            0.1865
## class 2:            0.0208       0.0961         0.3854            0.4977
## 
## $INTELB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0958       0.4191         0.3433            0.1418
## class 2:            0.2763       0.6686         0.0551            0.0000
## 
## Estimated class population shares 
##  0.4572 0.5428 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.4546 0.5454 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 1311 
## number of estimated parameters: 73 
## residual degrees of freedom: 1238 
## maximum log-likelihood: -17344.92 
##  
## AIC(2): 34835.85
## BIC(2): 35213.88
## G^2(2): 16277.22 (Likelihood ratio/deviance statistic) 
## X^2(2): 2.57217e+11 (Chi-square goodness of fit) 
##

nes3 <- poLCA(f,election,nclass=3) # log-likelihood: -16714.66

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $MORALG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1851       0.3668         0.2350            0.2131
## class 2:            0.5224       0.4109         0.0390            0.0277
## class 3:            0.1013       0.5990         0.2552            0.0446
## 
## $CARESG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0879       0.2546         0.3524            0.3051
## class 2:            0.3970       0.4605         0.0895            0.0529
## class 3:            0.0227       0.5090         0.3819            0.0864
## 
## $KNOWG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.2542       0.4253         0.2382            0.0822
## class 2:            0.5878       0.3543         0.0266            0.0312
## class 3:            0.0429       0.8058         0.1408            0.0105
## 
## $LEADG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0747       0.2582         0.3856            0.2816
## class 2:            0.3747       0.4772         0.1085            0.0396
## class 3:            0.0207       0.4886         0.4268            0.0639
## 
## $DISHONG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.2007       0.3060         0.2774            0.2158
## class 2:            0.0426       0.0487         0.3371            0.5717
## class 3:            0.0519       0.2182         0.4998            0.2301
## 
## $INTELG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.2936       0.4407         0.1852            0.0805
## class 2:            0.5763       0.3698         0.0197            0.0341
## class 3:            0.0600       0.7953         0.1317            0.0130
## 
## $MORALB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.5797       0.3787         0.0248            0.0168
## class 2:            0.0966       0.3795         0.3638            0.1601
## class 3:            0.0642       0.7137         0.2164            0.0057
## 
## $CARESB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.3483       0.5362         0.0836            0.0319
## class 2:            0.0162       0.1125         0.4045            0.4668
## class 3:            0.0093       0.4847         0.4380            0.0679
## 
## $KNOWB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.4970       0.4899         0.0132            0.0000
## class 2:            0.0682       0.3115         0.3828            0.2374
## class 3:            0.0191       0.7583         0.2182            0.0045
## 
## $LEADB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.5106       0.4648         0.0187            0.0058
## class 2:            0.0236       0.2735         0.4524            0.2505
## class 3:            0.0537       0.7119         0.2217            0.0127
## 
## $DISHONB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0365       0.0740         0.1993            0.6902
## class 2:            0.0862       0.3096         0.4111            0.1931
## class 3:            0.0134       0.1471         0.5683            0.2711
## 
## $INTELB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.5284       0.4656         0.0060            0.0000
## class 2:            0.1179       0.3391         0.3440            0.1990
## class 3:            0.0435       0.7741         0.1796            0.0028
## 
## Estimated class population shares 
##  0.2608 0.3198 0.4194 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.2578 0.3166 0.4256 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 1311 
## number of estimated parameters: 110 
## residual degrees of freedom: 1201 
## maximum log-likelihood: -16714.66 
##  
## AIC(3): 33649.32
## BIC(3): 34218.96
## G^2(3): 15016.7 (Likelihood ratio/deviance statistic) 
## X^2(3): 16748924926 (Chi-square goodness of fit) 
##

# Three-class model with a single covariate (party)
f2a <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
             MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~PARTY
nes2a <- poLCA(f2a,election,nclass=3,nrep=5) # log-likelihood: -16222.32

## Model 1: llik = -16222.32 ... best llik = -16222.32
## Model 2: llik = -16222.32 ... best llik = -16222.32
## Model 3: llik = -16222.32 ... best llik = -16222.32
## Model 4: llik = -16227.43 ... best llik = -16222.32
## Model 5: llik = -16222.32 ... best llik = -16222.32
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $MORALG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1373       0.6682         0.1802            0.0143
## class 2:            0.6221       0.3350         0.0172            0.0258
## class 3:            0.1081       0.3832         0.3038            0.2048
## 
## $CARESG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0388       0.6138         0.2886            0.0589
## class 2:            0.4858       0.4164         0.0534            0.0444
## class 3:            0.0356       0.2281         0.4501            0.2861
## 
## $KNOWG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0712       0.8173         0.1025             0.009
## class 2:            0.7189       0.2451         0.0040             0.032
## class 3:            0.1436       0.5327         0.2556             0.068
## 
## $LEADG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0256       0.6280         0.3144            0.0320
## class 2:            0.4720       0.4326         0.0643            0.0311
## class 3:            0.0278       0.1899         0.5137            0.2685
## 
## $DISHONG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0210       0.1412         0.5341            0.3037
## class 2:            0.0518       0.0538         0.2890            0.6054
## class 3:            0.1876       0.3435         0.3078            0.1611
## 
## $INTELG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0698       0.8159         0.1003            0.0140
## class 2:            0.7381       0.2219         0.0089            0.0311
## class 3:            0.1704       0.5597         0.2000            0.0698
## 
## $MORALB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0310       0.6326         0.3003            0.0361
## class 2:            0.1610       0.3749         0.3163            0.1478
## class 3:            0.4477       0.5135         0.0313            0.0075
## 
## $CARESB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0047       0.3274         0.5083            0.1597
## class 2:            0.0458       0.1490         0.3780            0.4272
## class 3:            0.2516       0.6185         0.1097            0.0202
## 
## $KNOWB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0121       0.6511         0.2920            0.0447
## class 2:            0.1300       0.3503         0.2989            0.2209
## class 3:            0.3427       0.5913         0.0660            0.0000
## 
## $LEADB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0301       0.5884         0.3294            0.0521
## class 2:            0.0743       0.3035         0.3882            0.2340
## class 3:            0.3850       0.5803         0.0287            0.0060
## 
## $DISHONB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0264       0.1855         0.5931            0.1950
## class 2:            0.0914       0.3117         0.3517            0.2451
## class 3:            0.0163       0.0680         0.2923            0.6234
## 
## $INTELB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0350       0.6646         0.2616            0.0388
## class 2:            0.1877       0.3637         0.2698            0.1787
## class 3:            0.3765       0.5878         0.0357            0.0000
## 
## Estimated class population shares 
##  0.3859 0.2736 0.3405 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3815 0.2769 0.3415 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## 2 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)     1.16155     0.17989    6.457         0
## PARTY          -0.57436     0.06401   -8.973         0
## ========================================================= 
## 3 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)    -3.81813     0.31109  -12.274         0
## PARTY           0.79327     0.06232   12.728         0
## ========================================================= 
## number of observations: 1300 
## number of estimated parameters: 112 
## residual degrees of freedom: 1188 
## maximum log-likelihood: -16222.32 
##  
## AIC(3): 32668.65
## BIC(3): 33247.7
## X^2(3): 34565233197 (Chi-square goodness of fit) 
##  
## ALERT: estimation algorithm automatically restarted with new initial values 
##

pidmat <- cbind(1,c(1:7))
exb <- exp(pidmat %*% nes2a$coeff)
matplot(c(1:7),(cbind(1,exb)/(1+rowSums(exb))),ylim=c(0,1),type="l",
        main="Party ID as a predictor of candidate affinity class",
        xlab="Party ID: strong Democratic (1) to strong Republican (7)",
        ylab="Probability of latent class membership",lwd=2,col=1)
text(5.9,0.35,"Other")
text(5.4,0.7,"Bush affinity")
text(1.8,0.6,"Gore affinity")

# 1982 General Social Survey (sample data)
data(gss82)
f <- cbind(PURPOSE,ACCURACY,UNDERSTA,COOPERAT)~1
gss.lc2 <- poLCA(f,gss82,nclass=2) # log-likelihood = -2783.268

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $PURPOSE
##             Good Depends Waste of time
## class 1:  0.8953  0.0579        0.0468
## class 2:  0.2154  0.2066        0.5780
## 
## $ACCURACY
##           Mostly true Not true
## class 1:       0.6367   0.3633
## class 2:       0.0297   0.9703
## 
## $UNDERSTA
##             Good Fair/Poor
## class 1:  0.8327    0.1673
## class 2:  0.7422    0.2578
## 
## $COOPERAT
##           Interested Cooperative Impatient
## class 1:      0.8840      0.1043    0.0117
## class 2:      0.6478      0.2498    0.1024
## 
## Estimated class population shares 
##  0.8077 0.1923 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.8136 0.1864 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 1202 
## number of estimated parameters: 13 
## residual degrees of freedom: 22 
## maximum log-likelihood: -2783.268 
##  
## AIC(2): 5592.536
## BIC(2): 5658.729
## G^2(2): 79.33723 (Likelihood ratio/deviance statistic) 
## X^2(2): 93.25329 (Chi-square goodness of fit) 
##

# Could also try:
# gss.lc3 <- poLCA(f,gss82,nclass=3,maxiter=3000,nrep=10) # log-likelihood = -2754.545
# gss.lc4 <- poLCA(f,gss82,nclass=4,maxiter=15000,nrep=10,tol=1e-7) # log-likelihood = -2746.621




#Latent class analysis of polytomous outcome variables

data(values)
f <- cbind(A,B,C,D)~1
M0 <- poLCA(f,values,nclass=1) # log-likelihood: -543.6498

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.2083 0.7917
## 
## $B
##           Pr(1) Pr(2)
## class 1:    0.5   0.5
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.4861 0.5139
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.6898 0.3102
## 
## Estimated class population shares 
##  1 
##  
## Predicted class memberships (by modal posterior prob.) 
##  1 
##  
## ========================================================= 
## Fit for 1 latent classes: 
## ========================================================= 
## number of observations: 216 
## number of estimated parameters: 4 
## residual degrees of freedom: 11 
## maximum log-likelihood: -543.6498 
##  
## AIC(1): 1095.3
## BIC(1): 1108.801
## G^2(1): 81.08423 (Likelihood ratio/deviance statistic) 
## X^2(1): 104.1071 (Chi-square goodness of fit) 
##

M1 <- poLCA(f,values,nclass=2) # log-likelihood: -504.4677

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.2864 0.7136
## class 2:  0.0068 0.9932
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.6704 0.3296
## class 2:  0.0602 0.9398
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.6460 0.3540
## class 2:  0.0735 0.9265
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.8676 0.1324
## class 2:  0.2309 0.7691
## 
## Estimated class population shares 
##  0.7208 0.2792 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.6713 0.3287 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 216 
## number of estimated parameters: 9 
## residual degrees of freedom: 6 
## maximum log-likelihood: -504.4677 
##  
## AIC(2): 1026.935
## BIC(2): 1057.313
## G^2(2): 2.719922 (Likelihood ratio/deviance statistic) 
## X^2(2): 2.719764 (Chi-square goodness of fit) 
##

M2 <- poLCA(f,values,nclass=3,maxiter=8000) # log-likelihood: -503.3011

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.5320 0.4680
## class 2:  0.1599 0.8401
## class 3:  0.0024 0.9976
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.9132 0.0868
## class 2:  0.5097 0.4903
## class 3:  0.0218 0.9782
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.7335 0.2665
## class 2:  0.5573 0.4427
## class 3:  0.0031 0.9969
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.9266 0.0734
## class 2:  0.8024 0.1976
## class 3:  0.0938 0.9062
## 
## Estimated class population shares 
##  0.2132 0.5906 0.1962 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.1435 0.662 0.1944 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 216 
## number of estimated parameters: 14 
## residual degrees of freedom: 1 
## maximum log-likelihood: -503.3011 
##  
## AIC(3): 1034.602
## BIC(3): 1081.856
## G^2(3): 0.3868562 (Likelihood ratio/deviance statistic) 
## X^2(3): 0.4225485 (Chi-square goodness of fit) 
##

##
## Three-class model with a single covariate.
##


data(election)
f2a <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
             MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~PARTY
nes2a <- poLCA(f2a,election,nclass=3,nrep=5) # log-likelihood: -16222.32

## Model 1: llik = -16222.32 ... best llik = -16222.32
## Model 2: llik = -16222.32 ... best llik = -16222.32
## Model 3: llik = -16222.32 ... best llik = -16222.32
## Model 4: llik = -16222.32 ... best llik = -16222.32
## Model 5: llik = -16222.32 ... best llik = -16222.32
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $MORALG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1373       0.6682         0.1802            0.0143
## class 2:            0.6221       0.3350         0.0172            0.0258
## class 3:            0.1081       0.3832         0.3038            0.2048
## 
## $CARESG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0388       0.6138         0.2886            0.0589
## class 2:            0.4858       0.4164         0.0534            0.0444
## class 3:            0.0356       0.2281         0.4501            0.2861
## 
## $KNOWG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0712       0.8173         0.1025             0.009
## class 2:            0.7189       0.2451         0.0040             0.032
## class 3:            0.1436       0.5327         0.2556             0.068
## 
## $LEADG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0256       0.6280         0.3144            0.0320
## class 2:            0.4720       0.4326         0.0643            0.0311
## class 3:            0.0278       0.1899         0.5137            0.2685
## 
## $DISHONG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0210       0.1412         0.5341            0.3037
## class 2:            0.0518       0.0538         0.2890            0.6054
## class 3:            0.1876       0.3435         0.3078            0.1611
## 
## $INTELG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0698       0.8159         0.1003            0.0140
## class 2:            0.7381       0.2219         0.0089            0.0311
## class 3:            0.1704       0.5597         0.2000            0.0698
## 
## $MORALB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0310       0.6326         0.3003            0.0361
## class 2:            0.1610       0.3749         0.3163            0.1478
## class 3:            0.4477       0.5135         0.0313            0.0075
## 
## $CARESB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0047       0.3274         0.5083            0.1597
## class 2:            0.0458       0.1490         0.3780            0.4272
## class 3:            0.2516       0.6185         0.1097            0.0202
## 
## $KNOWB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0121       0.6511         0.2920            0.0447
## class 2:            0.1300       0.3503         0.2989            0.2209
## class 3:            0.3427       0.5913         0.0660            0.0000
## 
## $LEADB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0301       0.5884         0.3294            0.0521
## class 2:            0.0743       0.3035         0.3882            0.2340
## class 3:            0.3850       0.5803         0.0287            0.0060
## 
## $DISHONB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0264       0.1855         0.5931            0.1950
## class 2:            0.0914       0.3117         0.3517            0.2451
## class 3:            0.0163       0.0680         0.2923            0.6234
## 
## $INTELB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0350       0.6646         0.2616            0.0388
## class 2:            0.1877       0.3637         0.2698            0.1787
## class 3:            0.3765       0.5878         0.0357            0.0000
## 
## Estimated class population shares 
##  0.3859 0.2736 0.3405 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3815 0.2769 0.3415 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## 2 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)     1.16155     0.17989    6.457         0
## PARTY          -0.57436     0.06401   -8.973         0
## ========================================================= 
## 3 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)    -3.81813     0.31109  -12.274         0
## PARTY           0.79327     0.06232   12.728         0
## ========================================================= 
## number of observations: 1300 
## number of estimated parameters: 112 
## residual degrees of freedom: 1188 
## maximum log-likelihood: -16222.32 
##  
## AIC(3): 32668.65
## BIC(3): 33247.7
## X^2(3): 34565234965 (Chi-square goodness of fit) 
##

pidmat <- cbind(1,c(1:7))
exb <- exp(pidmat %*% nes2a$coeff)
matplot(c(1:7),(cbind(1,exb)/(1+rowSums(exb))),ylim=c(0,1),type="l",
        main="Party ID as a predictor of candidate affinity class",
        xlab="Party ID: strong Democratic (1) to strong Republican (7)",
        ylab="Probability of latent class membership",lwd=2,col=1)
text(5.9,0.35,"Other")
text(5.4,0.7,"Bush affinity")
text(1.8,0.6,"Gore affinity")

#Entropy of a fitted latent class model

data(carcinoma)
f <- cbind(A,B,C,D,E,F,G)~1
lca2 <- poLCA(f,carcinoma,nclass=2) # log-likelihood: -317.2568

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.8835 0.1165
## class 2:  0.0000 1.0000
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.6456 0.3544
## class 2:  0.0169 0.9831
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.2391 0.7609
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.4589 0.5411
## 
## $E
##            Pr(1)  Pr(2)
## class 1:  0.7771 0.2229
## class 2:  0.0214 0.9786
## 
## $F
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.5773 0.4227
## 
## $G
##            Pr(1)  Pr(2)
## class 1:  0.8835 0.1165
## class 2:  0.0000 1.0000
## 
## Estimated class population shares 
##  0.4988 0.5012 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.5 0.5 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 118 
## number of estimated parameters: 15 
## residual degrees of freedom: 103 
## maximum log-likelihood: -317.2568 
##  
## AIC(2): 664.5137
## BIC(2): 706.0739
## G^2(2): 62.36543 (Likelihood ratio/deviance statistic) 
## X^2(2): 92.64814 (Chi-square goodness of fit) 
##

lca3 <- poLCA(f,carcinoma,nclass=3) # log-likelihood: -293.705

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.4872 0.5128
## class 3:  0.9427 0.0573
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.0191 0.9809
## class 2:  0.0000 1.0000
## class 3:  0.8621 0.1379
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.1425 0.8575
## class 2:  1.0000 0.0000
## class 3:  1.0000 0.0000
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.4138 0.5862
## class 2:  0.9424 0.0576
## class 3:  1.0000 0.0000
## 
## $E
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.2494 0.7506
## class 3:  0.9449 0.0551
## 
## $F
##            Pr(1)  Pr(2)
## class 1:  0.5236 0.4764
## class 2:  1.0000 0.0000
## class 3:  1.0000 0.0000
## 
## $G
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.3693 0.6307
## class 3:  1.0000 0.0000
## 
## Estimated class population shares 
##  0.4447 0.1817 0.3736 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.4322 0.1949 0.3729 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 118 
## number of estimated parameters: 23 
## residual degrees of freedom: 95 
## maximum log-likelihood: -293.705 
##  
## AIC(3): 633.41
## BIC(3): 697.1357
## G^2(3): 15.26171 (Likelihood ratio/deviance statistic) 
## X^2(3): 20.50335 (Chi-square goodness of fit) 
##

lca4 <- poLCA(f,carcinoma,nclass=4,nrep=10,maxiter=5000) # log-likelihood: -289.2858

## Model 1: llik = -291.2649 ... best llik = -291.2649
## Model 2: llik = -291.2649 ... best llik = -291.2649
## Model 3: llik = -292.493 ... best llik = -291.2649
## Model 4: llik = -292.493 ... best llik = -291.2649
## Model 5: llik = -292.493 ... best llik = -291.2649
## Model 6: llik = -289.2858 ... best llik = -289.2858
## Model 7: llik = -289.2858 ... best llik = -289.2858
## Model 8: llik = -289.7889 ... best llik = -289.2858
## Model 9: llik = -292.493 ... best llik = -289.2858
## Model 10: llik = -289.7889 ... best llik = -289.2858
## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.0000 1.0000
## class 3:  0.4634 0.5366
## class 4:  0.9422 0.0578
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.0905 0.9095
## class 2:  0.0000 1.0000
## class 3:  0.0000 1.0000
## class 4:  0.8584 0.1416
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.0186 0.9814
## class 2:  0.1561 0.8439
## class 3:  1.0000 0.0000
## class 4:  1.0000 0.0000
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.2421 0.7579
## class 3:  0.9404 0.0596
## class 4:  1.0000 0.0000
## 
## $E
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.0000 1.0000
## class 3:  0.2341 0.7659
## class 4:  0.9443 0.0557
## 
## $F
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.3823 0.6177
## class 3:  1.0000 0.0000
## class 4:  1.0000 0.0000
## 
## $G
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.0000 1.0000
## class 3:  0.3482 0.6518
## class 4:  1.0000 0.0000
## 
## Estimated class population shares 
##  0.0936 0.343 0.1882 0.3751 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.1186 0.3136 0.1949 0.3729 
##  
## ========================================================= 
## Fit for 4 latent classes: 
## ========================================================= 
## number of observations: 118 
## number of estimated parameters: 31 
## residual degrees of freedom: 87 
## maximum log-likelihood: -289.2858 
##  
## AIC(4): 640.5717
## BIC(4): 726.4629
## G^2(4): 6.423452 (Likelihood ratio/deviance statistic) 
## X^2(4): 10.08438 (Chi-square goodness of fit) 
##

# Maximum entropy (if all cases equally dispersed)
log(prod(sapply(lca2$probs,ncol)))

## [1] 4.85203

# Sample entropy ("plug-in" estimator, or MLE)
p.hat <- lca2$predcell$observed/lca2$N
H.hat <- -sum(p.hat * log(p.hat))
H.hat # 2.42

## [1] 2.424357

# Entropy of fitted latent class models
poLCA.entropy(lca2)

## [1] 2.693452

poLCA.entropy(lca3)

## [1] 2.494442

poLCA.entropy(lca4)

## [1] 2.458558

#Posterior probabilities from a latent class model

data(election)

## Basic latent class model with three classes
f1 <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
            MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~1
lc1 <- poLCA(f1,election,nclass=3) # log-likelihood: -16714.66

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $MORALG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.5224       0.4109         0.0390            0.0277
## class 2:            0.1013       0.5990         0.2552            0.0446
## class 3:            0.1851       0.3668         0.2350            0.2131
## 
## $CARESG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.3970       0.4605         0.0895            0.0529
## class 2:            0.0227       0.5090         0.3819            0.0864
## class 3:            0.0879       0.2546         0.3524            0.3051
## 
## $KNOWG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.5878       0.3543         0.0266            0.0312
## class 2:            0.0429       0.8058         0.1408            0.0105
## class 3:            0.2542       0.4253         0.2382            0.0822
## 
## $LEADG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.3747       0.4772         0.1085            0.0396
## class 2:            0.0207       0.4886         0.4268            0.0639
## class 3:            0.0747       0.2582         0.3856            0.2816
## 
## $DISHONG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0426       0.0487         0.3371            0.5717
## class 2:            0.0519       0.2182         0.4998            0.2301
## class 3:            0.2007       0.3060         0.2774            0.2158
## 
## $INTELG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.5763       0.3698         0.0197            0.0341
## class 2:            0.0600       0.7953         0.1317            0.0130
## class 3:            0.2936       0.4407         0.1852            0.0805
## 
## $MORALB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0966       0.3795         0.3638            0.1601
## class 2:            0.0642       0.7137         0.2164            0.0057
## class 3:            0.5797       0.3787         0.0248            0.0168
## 
## $CARESB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0162       0.1125         0.4045            0.4668
## class 2:            0.0093       0.4847         0.4380            0.0679
## class 3:            0.3483       0.5362         0.0836            0.0319
## 
## $KNOWB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0682       0.3115         0.3828            0.2374
## class 2:            0.0191       0.7583         0.2182            0.0045
## class 3:            0.4970       0.4899         0.0132            0.0000
## 
## $LEADB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0236       0.2735         0.4524            0.2505
## class 2:            0.0537       0.7119         0.2217            0.0127
## class 3:            0.5106       0.4648         0.0187            0.0058
## 
## $DISHONB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0862       0.3096         0.4111            0.1931
## class 2:            0.0134       0.1471         0.5683            0.2711
## class 3:            0.0365       0.0740         0.1993            0.6902
## 
## $INTELB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1179       0.3391         0.3440            0.1990
## class 2:            0.0435       0.7741         0.1796            0.0028
## class 3:            0.5284       0.4656         0.0060            0.0000
## 
## Estimated class population shares 
##  0.3198 0.4194 0.2608 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3166 0.4256 0.2578 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 1311 
## number of estimated parameters: 110 
## residual degrees of freedom: 1201 
## maximum log-likelihood: -16714.66 
##  
## AIC(3): 33649.32
## BIC(3): 34218.96
## G^2(3): 15016.7 (Likelihood ratio/deviance statistic) 
## X^2(3): 16748677477 (Chi-square goodness of fit) 
##

# The first observed case
lc1$y[1,]

##           MORALG           CARESG        KNOWG        LEADG        DISHONG
## 1 3 Not too well 1 Extremely well 2 Quite well 2 Quite well 3 Not too well
##         INTELG           MORALB           CARESB        KNOWB        LEADB
## 1 2 Quite well 1 Extremely well 1 Extremely well 2 Quite well 2 Quite well
##             DISHONB       INTELB
## 1 4 Not well at all 2 Quite well

lc1$posterior[1,]

## [1] 0.000831611 0.023509667 0.975658722

poLCA.posterior(lc=lc1,y=as.numeric(lc1$y[1,]))

##              [,1]       [,2]      [,3]
## [1,] 0.0008316111 0.02350969 0.9756587

# A hypothetical case
poLCA.posterior(lc=lc1,y=rep(2,12))

##              [,1]     [,2]        [,3]
## [1,] 0.0003774901 0.996542 0.003080526

# Entering y as a matrix
lc1$posterior[1:10,]

##               [,1]         [,2]         [,3]
##  [1,] 8.316110e-04 2.350967e-02 9.756587e-01
##  [2,] 2.253789e-01 7.728822e-01 1.738889e-03
##  [3,] 3.168471e-03 9.894063e-01 7.425270e-03
##  [4,] 3.042976e-05 6.260698e-02 9.373626e-01
##  [5,] 9.999747e-01 2.533777e-05 0.000000e+00
##  [6,] 1.498978e-03 9.980766e-01 4.244346e-04
##  [7,] 9.999943e-01 2.522438e-06 3.128020e-06
##  [8,] 1.242045e-03 9.985739e-01 1.840717e-04
##  [9,] 6.042419e-03 9.938004e-01 1.571399e-04
## [10,] 3.110120e-01 6.885251e-01 4.629982e-04

poLCA.posterior(lc=lc1,y=mapply(as.numeric,lc1$y[1:10,]))

##               [,1]         [,2]         [,3]
##  [1,] 8.316111e-04 2.350969e-02 9.756587e-01
##  [2,] 2.253790e-01 7.728821e-01 1.738888e-03
##  [3,] 3.168472e-03 9.894063e-01 7.425267e-03
##  [4,] 3.042978e-05 6.260707e-02 9.373625e-01
##  [5,] 9.999747e-01 2.533771e-05 0.000000e+00
##  [6,] 1.498979e-03 9.980766e-01 4.244344e-04
##  [7,] 9.999943e-01 2.522436e-06 3.128021e-06
##  [8,] 1.242046e-03 9.985739e-01 1.840716e-04
##  [9,] 6.042424e-03 9.938004e-01 1.571398e-04
## [10,] 3.110122e-01 6.885248e-01 4.629982e-04

## Latent class regression model with three classes
f2 <- cbind(MORALG,CARESG,KNOWG,LEADG,DISHONG,INTELG,
            MORALB,CARESB,KNOWB,LEADB,DISHONB,INTELB)~AGE+EDUC+GENDER
lc2 <- poLCA(f2,election,nclass=3) # log-likelihood: -16598.38

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $MORALG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.6315       0.3241         0.0174            0.0270
## class 2:            0.1390       0.3632         0.2784            0.2195
## class 3:            0.1186       0.6620         0.2063            0.0132
## 
## $CARESG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.4968       0.4041         0.0546            0.0446
## class 2:            0.0534       0.2452         0.4100            0.2913
## class 3:            0.0347       0.5795         0.3199            0.0659
## 
## $KNOWG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.7254       0.2429         0.0000            0.0318
## class 2:            0.1889       0.4799         0.2537            0.0774
## class 3:            0.0588       0.8195         0.1151            0.0066
## 
## $LEADG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.4805       0.4320         0.0558            0.0318
## class 2:            0.0436       0.2259         0.4548            0.2757
## class 3:            0.0230       0.5791         0.3597            0.0382
## 
## $DISHONG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0538       0.0382         0.2855            0.6225
## class 2:            0.2007       0.3408         0.2756            0.1829
## class 3:            0.0223       0.1599         0.5445            0.2734
## 
## $INTELG
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.7296       0.2298         0.0086            0.0319
## class 2:            0.2225       0.4991         0.2034            0.0750
## class 3:            0.0632       0.8272         0.0974            0.0121
## 
## $MORALB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1339       0.3877         0.3337            0.1448
## class 2:            0.4895       0.4545         0.0390            0.0169
## class 3:            0.0370       0.6529         0.2764            0.0336
## 
## $CARESB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0233       0.1293         0.3998            0.4476
## class 2:            0.2818       0.5873         0.1018            0.0291
## class 3:            0.0038       0.3713         0.4751            0.1499
## 
## $KNOWB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1054       0.3440         0.3235            0.2271
## class 2:            0.3947       0.5491         0.0562            0.0000
## class 3:            0.0125       0.6651         0.2785            0.0438
## 
## $LEADB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0428       0.2993         0.4138            0.2442
## class 2:            0.4319       0.5274         0.0347            0.0060
## class 3:            0.0314       0.6177         0.2984            0.0525
## 
## $DISHONB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.0882       0.3231         0.3657            0.2230
## class 2:            0.0283       0.0790         0.2485            0.6443
## class 3:            0.0247       0.1730         0.5915            0.2108
## 
## $INTELB
##           1 Extremely well 2 Quite well 3 Not too well 4 Not well at all
## class 1:            0.1542       0.3685         0.2880            0.1894
## class 2:            0.4301       0.5396         0.0303            0.0000
## class 3:            0.0357       0.6795         0.2453            0.0395
## 
## Estimated class population shares 
##  0.2572 0.3244 0.4184 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.2579 0.3254 0.4167 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## 2 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)     1.36386     0.48942    2.787     0.005
## AGE            -0.00966     0.00536   -1.804     0.071
## EDUC            0.00095     0.05635    0.017     0.987
## GENDER         -0.42794     0.17363   -2.465     0.014
## ========================================================= 
## 3 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)     2.10579     0.46292    4.549     0.000
## AGE            -0.02561     0.00519   -4.933     0.000
## EDUC            0.01385     0.05364    0.258     0.796
## GENDER         -0.31634     0.16924   -1.869     0.062
## ========================================================= 
## number of observations: 1303 
## number of estimated parameters: 116 
## residual degrees of freedom: 1187 
## maximum log-likelihood: -16598.38 
##  
## AIC(3): 33428.77
## BIC(3): 34028.77
## X^2(3): 28324231195 (Chi-square goodness of fit) 
##

# Posteriors for case number 97 (poorly classified)
lc2$y[97,]

##           MORALG           CARESG        KNOWG             LEADG
## 136 2 Quite well 1 Extremely well 2 Quite well 4 Not well at all
##               DISHONG            INTELG       MORALB         CARESB
## 136 4 Not well at all 4 Not well at all 2 Quite well 3 Not too well
##            KNOWB        LEADB      DISHONB       INTELB
## 136 2 Quite well 2 Quite well 2 Quite well 2 Quite well

lc2$x[97,]

##     X.Intercept. AGE EDUC GENDER
## 136            1  41    6      1

lc2$posterior[97,]

## [1] 0.2879891 0.2132583 0.4987526

poLCA.posterior(lc=lc2,y=as.numeric(lc2$y[97,]),x=c(41,6,1))

##          [,1]      [,2]      [,3]
## [1,] 0.287989 0.2132584 0.4987526

# If x is not specified, the posterior is calculated using the population average
poLCA.posterior(lc=lc2,y=as.numeric(lc2$y[97,]))

##          [,1]     [,2]      [,3]
## [1,] 0.359807 0.194796 0.4453971

# Entering y and x as matrices
round(lc2$posterior[95:100,],2)

##      [,1] [,2] [,3]
## [1,] 1.00 0.00 0.00
## [2,] 1.00 0.00 0.00
## [3,] 0.29 0.21 0.50
## [4,] 0.00 1.00 0.00
## [5,] 0.00 1.00 0.00
## [6,] 0.98 0.00 0.02

round(poLCA.posterior(lc=lc2,y=mapply(as.numeric,lc2$y[95:100,]),
                      x=as.matrix(lc2$x[95:100,-1])),2)

##      [,1] [,2] [,3]
## [1,] 1.00 0.00 0.00
## [2,] 1.00 0.00 0.00
## [3,] 0.29 0.21 0.50
## [4,] 0.00 1.00 0.00
## [5,] 0.00 1.00 0.00
## [6,] 0.98 0.00 0.02

#Predicted cell percentages in a latent class model

data(carcinoma)
f <- cbind(A,B,C,D,E,F,G)~1
lca3 <- poLCA(f,carcinoma,nclass=3) # log-likelihood: -293.705

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.9427 0.0573
## class 2:  0.4872 0.5128
## class 3:  0.0000 1.0000
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.8621 0.1379
## class 2:  0.0000 1.0000
## class 3:  0.0191 0.9809
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  1.0000 0.0000
## class 3:  0.1425 0.8575
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.9424 0.0576
## class 3:  0.4138 0.5862
## 
## $E
##            Pr(1)  Pr(2)
## class 1:  0.9449 0.0551
## class 2:  0.2494 0.7506
## class 3:  0.0000 1.0000
## 
## $F
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  1.0000 0.0000
## class 3:  0.5236 0.4764
## 
## $G
##            Pr(1)  Pr(2)
## class 1:  1.0000 0.0000
## class 2:  0.3693 0.6307
## class 3:  0.0000 1.0000
## 
## Estimated class population shares 
##  0.3736 0.1817 0.4447 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3729 0.1949 0.4322 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 118 
## number of estimated parameters: 23 
## residual degrees of freedom: 95 
## maximum log-likelihood: -293.705 
##  
## AIC(3): 633.41
## BIC(3): 697.1357
## G^2(3): 15.26171 (Likelihood ratio/deviance statistic) 
## X^2(3): 20.50335 (Chi-square goodness of fit) 
##

# Only 20 out of 32 possible response patterns are observed
lca3$predcell

##    A B C D E F G observed expected
## 1  1 1 1 1 1 1 1       34   33.849
## 2  1 1 1 1 2 1 1        2    1.973
## 3  1 2 1 1 1 1 1        6    6.323
## 4  1 2 1 1 1 1 2        1    1.548
## 5  1 2 1 1 2 1 1        4    3.045
## 6  1 2 1 1 2 1 2        5    4.660
## 7  2 1 1 1 1 1 1        2    2.058
## 8  2 1 2 1 2 1 2        1    0.186
## 9  2 2 1 1 1 1 1        2    1.284
## 10 2 2 1 1 1 1 2        1    1.630
## 11 2 2 1 1 2 1 1        2    2.892
## 12 2 2 1 1 2 1 2        7    6.494
## 13 2 2 1 1 2 2 2        1    1.446
## 14 2 2 1 2 1 1 2        1    0.100
## 15 2 2 1 2 2 1 2        2    2.552
## 16 2 2 1 2 2 2 2        3    2.049
## 17 2 2 2 1 2 1 2       13    9.563
## 18 2 2 2 1 2 2 2        5    8.701
## 19 2 2 2 2 2 1 2       10   13.550
## 20 2 2 2 2 2 2 2       16   12.328

# Produce cell probabilities for one sequence of responses
poLCA.predcell(lc=lca3,y=c(1,1,1,1,1,1,1))

##           [,1]
## [1,] 0.2868564

# Estimated probabilities for a cell with zero observations
poLCA.predcell(lc=lca3,y=c(1,1,1,1,1,1,2))

##              [,1]
## [1,] 7.750976e-25

# Cell probabilities for both cells at once; y entered as a matrix
poLCA.predcell(lc=lca3,y=rbind(c(1,1,1,1,1,1,1),c(1,1,1,1,1,1,2)))

##              [,1]
## [1,] 2.868564e-01
## [2,] 7.750976e-25

#Reorder latent classes in poLCA

## Using the "cheating" sample data set, make the larger
## non-cheater class the first ("reference") class in a
## latent class regression model. The coefficient on GPA
## now maintains a consistent interpretation.
##
data(cheating)
f2 <- cbind(LIEEXAM,LIEPAPER,FRAUD,COPYEXAM)~GPA
lc.ch <- poLCA(f2,cheating,nclass=2,verbose=FALSE)
probs.start.new <- poLCA.reorder(lc.ch$probs.start,order(lc.ch$P,decreasing=TRUE))
lc.ch <- poLCA(f2,cheating,nclass=2,probs.start=probs.start.new)

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $LIEEXAM
##            Pr(1)  Pr(2)
## class 1:  0.9903 0.0097
## class 2:  0.4389 0.5611
## 
## $LIEPAPER
##            Pr(1)  Pr(2)
## class 1:  0.9647 0.0353
## class 2:  0.4858 0.5142
## 
## $FRAUD
##            Pr(1)  Pr(2)
## class 1:  0.9655 0.0345
## class 2:  0.7850 0.2150
## 
## $COPYEXAM
##            Pr(1)  Pr(2)
## class 1:  0.8257 0.1743
## class 2:  0.5925 0.4075
## 
## Estimated class population shares 
##  0.8219 0.1781 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.8508 0.1492 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## 2 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)     0.11343     0.50992    0.222     0.833
## GPA            -0.84248     0.28131   -2.995     0.030
## ========================================================= 
## number of observations: 315 
## number of estimated parameters: 10 
## residual degrees of freedom: 5 
## maximum log-likelihood: -429.6384 
##  
## AIC(2): 879.2768
## BIC(2): 916.8025
## X^2(2): 8.64173 (Chi-square goodness of fit) 
##

#Create simulated cross-classification data

# Create a sample data set with 3 classes and no covariates,
# and run poLCA to recover the specified parameters.
# Each matrix in the probs list contains one of the manifest variables'
# "true" conditional response probabilities.
probs <- list(matrix(c(0.6,0.1,0.3, 0.6,0.3,0.1, 0.3,0.1,0.6 ),ncol=3,byrow=TRUE), # Y1
              matrix(c(0.2,0.8, 0.7,0.3, 0.3,0.7 ),ncol=2,byrow=TRUE), # Y2
              matrix(c(0.3,0.6,0.1, 0.1,0.3,0.6, 0.3,0.6,0.1 ),ncol=3,byrow=TRUE), # Y3
              matrix(c(0.1,0.1,0.5,0.3, 0.5,0.3,0.1,0.1, 0.3,0.1,0.1,0.5),ncol=4,byrow=TRUE), # Y4
              matrix(c(0.1,0.1,0.8, 0.1,0.8,0.1, 0.8,0.1,0.1 ),ncol=3,byrow=TRUE)) # Y5
simdat <- poLCA.simdata(N=1000,probs,P=c(0.2,0.3,0.5))
f1 <- cbind(Y1,Y2,Y3,Y4,Y5)~1
lc1 <- poLCA(f1,simdat$dat,nclass=3)

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Y1
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.5792 0.0872 0.3336
## class 2:  0.2568 0.1109 0.6323
## class 3:  0.6521 0.2523 0.0956
## 
## $Y2
##            Pr(1)  Pr(2)
## class 1:  0.2417 0.7583
## class 2:  0.3068 0.6932
## class 3:  0.6170 0.3830
## 
## $Y3
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.2393 0.6844 0.0763
## class 2:  0.2805 0.6247 0.0948
## class 3:  0.1229 0.2515 0.6256
## 
## $Y4
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.0851 0.0737 0.5009 0.3403
## class 2:  0.3227 0.0980 0.1265 0.4529
## class 3:  0.4877 0.2975 0.1068 0.1080
## 
## $Y5
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.0000 0.1298 0.8702
## class 2:  0.8435 0.0584 0.0981
## class 3:  0.0554 0.8464 0.0982
## 
## Estimated class population shares 
##  0.1708 0.5026 0.3266 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.203 0.466 0.331 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 1000 
## number of estimated parameters: 32 
## residual degrees of freedom: 183 
## maximum log-likelihood: -4792.99 
##  
## AIC(3): 9649.981
## BIC(3): 9807.029
## G^2(3): 184.554 (Likelihood ratio/deviance statistic) 
## X^2(3): 164.7838 (Chi-square goodness of fit) 
##

table(lc1$predclass,simdat$trueclass)

##    
##       1   2   3
##   1 147   9  47
##   2  30  20 416
##   3  25 284  22

# Create a sample dataset with 2 classes and three covariates.
# Then compare predicted class memberships when the model is
# estimated "correctly" with covariates to when it is estimated
# "incorrectly" without covariates.
simdat2 <- poLCA.simdata(N=1000,ndv=7,niv=3,nclass=2,b=matrix(c(1,-2,1,-1)))
f2a <- cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7)~X1+X2+X3
lc2a <- poLCA(f2a,simdat2$dat,nclass=2)

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Y1
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.7470 0.2479 0.0051
## class 2:  0.3327 0.4315 0.2358
## 
## $Y2
##            Pr(1)  Pr(2)
## class 1:  0.1887 0.8113
## class 2:  0.9062 0.0938
## 
## $Y3
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.3132 0.3417 0.3452
## class 2:  0.5896 0.2122 0.1981
## 
## $Y4
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.4022 0.5658 0.0320
## class 2:  0.4554 0.4423 0.1023
## 
## $Y5
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.4226 0.1658 0.3214 0.0902
## class 2:  0.4125 0.1753 0.3147 0.0975
## 
## $Y6
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.3869 0.3638 0.0427 0.2065
## class 2:  0.3641 0.0369 0.2511 0.3479
## 
## $Y7
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.3084 0.4371 0.2545
## class 2:  0.0204 0.2474 0.7323
## 
## Estimated class population shares 
##  0.3459 0.6541 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.338 0.662 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## 2 / 1 
##             Coefficient  Std. error  t value  Pr(>|t|)
## (Intercept)     1.18728     0.14964    7.934         0
## X1             -2.03651     0.19371  -10.513         0
## X2              1.15074     0.13970    8.237         0
## X3             -1.10637     0.14084   -7.855         0
## ========================================================= 
## number of observations: 1000 
## number of estimated parameters: 34 
## residual degrees of freedom: 966 
## maximum log-likelihood: -6682.125 
##  
## AIC(2): 13432.25
## BIC(2): 13599.11
## X^2(2): 4797.698 (Chi-square goodness of fit) 
##

f2b <- cbind(Y1,Y2,Y3,Y4,Y5,Y6,Y7)~1
lc2b <- poLCA(f2b,simdat2$dat,nclass=2)

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $Y1
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.3466 0.4220 0.2314
## class 2:  0.7259 0.2637 0.0104
## 
## $Y2
##            Pr(1)  Pr(2)
## class 1:  0.9061 0.0939
## class 2:  0.1790 0.8210
## 
## $Y3
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.5952 0.2115 0.1933
## class 2:  0.2985 0.3448 0.3566
## 
## $Y4
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.4590 0.4406 0.1004
## class 2:  0.3945 0.5707 0.0348
## 
## $Y5
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.4030 0.1809 0.3196 0.0965
## class 2:  0.4411 0.1547 0.3120 0.0922
## 
## $Y6
##            Pr(1)  Pr(2)  Pr(3)  Pr(4)
## class 1:  0.3672 0.0374 0.2541 0.3413
## class 2:  0.3813 0.3675 0.0340 0.2173
## 
## $Y7
##            Pr(1)  Pr(2)  Pr(3)
## class 1:  0.0133 0.2537 0.7331
## class 2:  0.3261 0.4276 0.2463
## 
## Estimated class population shares 
##  0.6588 0.3412 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.657 0.343 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 1000 
## number of estimated parameters: 31 
## residual degrees of freedom: 969 
## maximum log-likelihood: -6877.585 
##  
## AIC(2): 13817.17
## BIC(2): 13969.31
## G^2(2): 1510.651 (Likelihood ratio/deviance statistic) 
## X^2(2): 4237.38 (Chi-square goodness of fit) 
##

table(lc2a$predclass,lc2b$predclass)

##    
##       1   2
##   1  27 311
##   2 630  32

#Frequency tables of predicted cell counts from latent class analysis

data(gss82)
f <- cbind(PURPOSE,ACCURACY,UNDERSTA,COOPERAT)~1
gss.lc2 <- poLCA(f,gss82,nclass=2)

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $PURPOSE
##             Good Depends Waste of time
## class 1:  0.8953  0.0579        0.0468
## class 2:  0.2154  0.2066        0.5780
## 
## $ACCURACY
##           Mostly true Not true
## class 1:       0.6367   0.3633
## class 2:       0.0297   0.9703
## 
## $UNDERSTA
##             Good Fair/Poor
## class 1:  0.8327    0.1673
## class 2:  0.7422    0.2578
## 
## $COOPERAT
##           Interested Cooperative Impatient
## class 1:      0.8840      0.1043    0.0117
## class 2:      0.6478      0.2498    0.1024
## 
## Estimated class population shares 
##  0.8077 0.1923 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.8136 0.1864 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 1202 
## number of estimated parameters: 13 
## residual degrees of freedom: 22 
## maximum log-likelihood: -2783.268 
##  
## AIC(2): 5592.536
## BIC(2): 5658.729
## G^2(2): 79.33723 (Likelihood ratio/deviance statistic) 
## X^2(2): 93.25329 (Chi-square goodness of fit) 
##

gss.lc2$predcell

##    PURPOSE ACCURACY UNDERSTA COOPERAT observed expected
## 1        1        1        1        1      419  408.076
## 2        1        1        1        2       35   48.339
## 3        1        1        1        3        2    5.497
## 4        1        1        2        1       71   82.090
## 5        1        1        2        2       25    9.752
## 6        1        1        2        3        5    1.121
## 7        1        2        1        1      270  255.710
## 8        1        2        1        2       25   36.387
## 9        1        2        1        3        4    6.743
## 10       1        2        2        1       42   54.774
## 11       1        2        2        2       16    8.621
## 12       1        2        2        3        5    1.892
## 13       2        1        1        1       23   27.047
## 14       2        1        1        2        4    3.374
## 15       2        1        1        3        1    0.456
## 16       2        1        2        1        6    5.534
## 17       2        1        2        2        2    0.716
## 18       2        2        1        1       43   37.321
## 19       2        2        1        2        9   10.365
## 20       2        2        1        3        2    3.718
## 21       2        2        2        1        9   10.759
## 22       2        2        2        2        3    3.340
## 23       2        2        2        3        2    1.262
## 24       3        1        1        1       26   23.198
## 25       3        1        1        2        3    3.248
## 26       3        1        2        1        1    4.940
## 27       3        1        2        2        2    0.760
## 28       3        2        1        1       85   74.468
## 29       3        2        1        2       23   25.464
## 30       3        2        1        3        6   10.007
## 31       3        2        2        1       13   24.084
## 32       3        2        2        2       12    8.634
## 33       3        2        2        3        8    3.452

poLCA.table(formula=COOPERAT~1,condition=list(PURPOSE=3,ACCURACY=1,UNDERSTA=2),lc=gss.lc2)

##  Interested Cooperative Impatient
##    4.940145    0.760367 0.1613175

poLCA.table(formula=COOPERAT~UNDERSTA,condition=list(PURPOSE=3,ACCURACY=1),lc=gss.lc2)

##                   Good Fair/Poor
## Interested  23.1977341 4.9401448
## Cooperative  3.2481185 0.7603670
## Impatient    0.5830967 0.1613175

poLCA.table(formula=COOPERAT~UNDERSTA,condition=list(),lc=gss.lc2)

##                  Good  Fair/Poor
## Interested  825.81927 182.180734
## Cooperative 127.17641  31.823590
## Impatient    27.00432   7.995676

#Random draws from a multinomial distribution

#Examples
##
## One draw from a three-category multinomial distribution.
##
p1 <- c(0.7,0.2,0.1)
rmulti(p1)

## [1] 1

##
## 10,000 draws from a three-category multinomial distribution.
##
n <- 10000
p2 <- matrix(p1,nrow=n,ncol=length(p1),byrow=TRUE)
rmdraws <- rmulti(p2)
table(rmdraws)/n # should be approximately 0.7, 0.2, 0.1

## rmdraws
##      1      2      3 
## 0.7028 0.2035 0.0937

##
## 10,000 draws from a mixture of three groups of a
## four-category multinomial distribution.
##
group.p <- matrix(c(0.5,0.3,0.2),nrow=n,ncol=3,byrow=TRUE)
group <- rmulti(group.p)
p3 <- t(matrix(NA,nrow=n,ncol=4))
p3[,group==1] <- c(0.7,0.1,0.1,0.1)
p3[,group==2] <- c(0.1,0.7,0.1,0.1)
p3[,group==3] <- c(0.1,0.1,0.1,0.7)
p3 <- t(p3)
rmdraws3 <- rmulti(p3)
table(group,rmdraws3)

##      rmdraws3
## group    1    2    3    4
##     1 3466  504  499  534
##     2  282 2094  284  305
##     3  196  214  181 1441

table(group,rmdraws3)/rowSums(table(group,rmdraws3))

##      rmdraws3
## group          1          2          3          4
##     1 0.69278433 0.10073956 0.09974016 0.10673596
##     2 0.09510961 0.70623946 0.09578415 0.10286678
##     3 0.09645669 0.10531496 0.08907480 0.70915354

#Universalistic vs. particularistic values (sample data)

## Replication of latent class models in Goodman (2002),
## Tables 5b, 5c, and 6.
##
data(values)
f <- cbind(A,B,C,D)~1
M0 <- poLCA(f,values,nclass=1) # log-likelihood: -543.6498

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.2083 0.7917
## 
## $B
##           Pr(1) Pr(2)
## class 1:    0.5   0.5
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.4861 0.5139
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.6898 0.3102
## 
## Estimated class population shares 
##  1 
##  
## Predicted class memberships (by modal posterior prob.) 
##  1 
##  
## ========================================================= 
## Fit for 1 latent classes: 
## ========================================================= 
## number of observations: 216 
## number of estimated parameters: 4 
## residual degrees of freedom: 11 
## maximum log-likelihood: -543.6498 
##  
## AIC(1): 1095.3
## BIC(1): 1108.801
## G^2(1): 81.08423 (Likelihood ratio/deviance statistic) 
## X^2(1): 104.1071 (Chi-square goodness of fit) 
##

M1 <- poLCA(f,values,nclass=2) # log-likelihood: -504.4677

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.0068 0.9932
## class 2:  0.2864 0.7136
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.0602 0.9398
## class 2:  0.6704 0.3296
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.0735 0.9265
## class 2:  0.6460 0.3540
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.2309 0.7691
## class 2:  0.8676 0.1324
## 
## Estimated class population shares 
##  0.2792 0.7208 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.3287 0.6713 
##  
## ========================================================= 
## Fit for 2 latent classes: 
## ========================================================= 
## number of observations: 216 
## number of estimated parameters: 9 
## residual degrees of freedom: 6 
## maximum log-likelihood: -504.4677 
##  
## AIC(2): 1026.935
## BIC(2): 1057.313
## G^2(2): 2.719922 (Likelihood ratio/deviance statistic) 
## X^2(2): 2.719764 (Chi-square goodness of fit) 
##

M2 <- poLCA(f,values,nclass=3,maxiter=8000) # log-likelihood: -503.3011

## Conditional item response (column) probabilities,
##  by outcome variable, for each class (row) 
##  
## $A
##            Pr(1)  Pr(2)
## class 1:  0.0022 0.9978
## class 2:  0.5188 0.4812
## class 3:  0.1557 0.8443
## 
## $B
##            Pr(1)  Pr(2)
## class 1:  0.0204 0.9796
## class 2:  0.9053 0.0947
## class 3:  0.5013 0.4987
## 
## $C
##            Pr(1)  Pr(2)
## class 1:  0.0000 1.0000
## class 2:  0.7310 0.2690
## class 3:  0.5522 0.4478
## 
## $D
##            Pr(1)  Pr(2)
## class 1:  0.0874 0.9126
## class 2:  0.9251 0.0749
## class 3:  0.7983 0.2017
## 
## Estimated class population shares 
##  0.193 0.2266 0.5804 
##  
## Predicted class memberships (by modal posterior prob.) 
##  0.1944 0.1435 0.662 
##  
## ========================================================= 
## Fit for 3 latent classes: 
## ========================================================= 
## number of observations: 216 
## number of estimated parameters: 14 
## residual degrees of freedom: 1 
## maximum log-likelihood: -503.3011 
##  
## AIC(3): 1034.602
## BIC(3): 1081.856
## G^2(3): 0.3868563 (Likelihood ratio/deviance statistic) 
## X^2(3): 0.4225483 (Chi-square goodness of fit) 
##

poLCA_ejemplos.R

carlos

Tue Jun 28 19:33:00 2016