### LCA.R
### purpose: read in SAMHSA 2015 data and run LCA on it
### with different number of classes,and (2) with a covariate
###
#poLCA(formula, data, nclass = 2, maxiter = 1000, graphs = FALSE, tol = 1e-10, na.rm = TRUE,
# probs.start = NULL, nrep = 1, verbose = TRUE, calc.se = TRUE)
###
#slide10:
library(poLCA)
## Warning: package 'poLCA' was built under R version 4.1.3
## Loading required package: scatterplot3d
## Warning: package 'scatterplot3d' was built under R version 4.1.3
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 4.1.3
samhsa2015 <- read.csv("C:/Users/u6032404/Downloads/samhsa_2015F.csv")
vars <- c("mhintake", "mhdiageval", "mhreferral", "treatmt","adminserv")
for (i in 1:length(vars)){
samhsa2015[,vars[i]] <- samhsa2015[,vars[i]]+1
}
f1 <- as.formula(cbind(mhintake, mhdiageval, mhreferral, treatmt, adminserv)~1)
LCA2 <- poLCA(f1, data=samhsa2015, nclass=2)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $mhintake
## Pr(1) Pr(2)
## class 1: 0.6644 0.3356
## class 2: 0.0261 0.9739
##
## $mhdiageval
## Pr(1) Pr(2)
## class 1: 0.6249 0.3751
## class 2: 0.0360 0.9640
##
## $mhreferral
## Pr(1) Pr(2)
## class 1: 0.7973 0.2027
## class 2: 0.1090 0.8910
##
## $treatmt
## Pr(1) Pr(2)
## class 1: 0.7877 0.2123
## class 2: 0.4031 0.5969
##
## $adminserv
## Pr(1) Pr(2)
## class 1: 0.7240 0.2760
## class 2: 0.2926 0.7074
##
## Estimated class population shares
## 0.1425 0.8575
##
## Predicted class memberships (by modal posterior prob.)
## 0.1249 0.8751
##
## =========================================================
## Fit for 2 latent classes:
## =========================================================
## number of observations: 12671
## number of estimated parameters: 11
## residual degrees of freedom: 20
## maximum log-likelihood: -29740.22
##
## AIC(2): 59502.44
## BIC(2): 59584.36
## G^2(2): 557.3724 (Likelihood ratio/deviance statistic)
## X^2(2): 582.4467 (Chi-square goodness of fit)
##
#slide13:
plot(LCA2)

#slide14:
LCA3 <- poLCA(f1, data=samhsa2015, nclass=3)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $mhintake
## Pr(1) Pr(2)
## class 1: 0.0087 0.9913
## class 2: 0.8097 0.1903
## class 3: 0.0573 0.9427
##
## $mhdiageval
## Pr(1) Pr(2)
## class 1: 0.0215 0.9785
## class 2: 0.7553 0.2447
## class 3: 0.0644 0.9356
##
## $mhreferral
## Pr(1) Pr(2)
## class 1: 0.0012 0.9988
## class 2: 0.8374 0.1626
## class 3: 0.2354 0.7646
##
## $treatmt
## Pr(1) Pr(2)
## class 1: 0.2458 0.7542
## class 2: 0.7955 0.2045
## class 3: 0.5514 0.4486
##
## $adminserv
## Pr(1) Pr(2)
## class 1: 0.1036 0.8964
## class 2: 0.7304 0.2696
## class 3: 0.4691 0.5309
##
## Estimated class population shares
## 0.3895 0.1045 0.506
##
## Predicted class memberships (by modal posterior prob.)
## 0.337 0.1048 0.5582
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 12671
## number of estimated parameters: 17
## residual degrees of freedom: 14
## maximum log-likelihood: -29542.74
##
## AIC(3): 59119.48
## BIC(3): 59246.08
## G^2(3): 162.4093 (Likelihood ratio/deviance statistic)
## X^2(3): 180.7466 (Chi-square goodness of fit)
##
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND
##
#slide15:
LCA3 <- poLCA(f1, data=samhsa2015, nclass=3, maxiter=3000)
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $mhintake
## Pr(1) Pr(2)
## class 1: 0.0084 0.9916
## class 2: 0.0571 0.9429
## class 3: 0.8093 0.1907
##
## $mhdiageval
## Pr(1) Pr(2)
## class 1: 0.0213 0.9787
## class 2: 0.0640 0.9360
## class 3: 0.7553 0.2447
##
## $mhreferral
## Pr(1) Pr(2)
## class 1: 0.0000 1.0000
## class 2: 0.2340 0.7660
## class 3: 0.8374 0.1626
##
## $treatmt
## Pr(1) Pr(2)
## class 1: 0.2441 0.7559
## class 2: 0.5497 0.4503
## class 3: 0.7957 0.2043
##
## $adminserv
## Pr(1) Pr(2)
## class 1: 0.1014 0.8986
## class 2: 0.4672 0.5328
## class 3: 0.7306 0.2694
##
## Estimated class population shares
## 0.3846 0.5108 0.1046
##
## Predicted class memberships (by modal posterior prob.)
## 0.337 0.5582 0.1048
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 12671
## number of estimated parameters: 17
## residual degrees of freedom: 14
## maximum log-likelihood: -29542.71
##
## AIC(3): 59119.43
## BIC(3): 59246.03
## G^2(3): 162.3576 (Likelihood ratio/deviance statistic)
## X^2(3): 180.5806 (Chi-square goodness of fit)
##
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND
##
LCA3 <- poLCA(f1, data=samhsa2015, nclass=3, nrep=5)
## Model 1: llik = -29542.73 ... best llik = -29542.73
## Model 2: llik = -29687.93 ... best llik = -29542.73
## Model 3: llik = -29542.74 ... best llik = -29542.73
## Model 4: llik = -29542.74 ... best llik = -29542.73
## Model 5: llik = -29542.72 ... best llik = -29542.72
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $mhintake
## Pr(1) Pr(2)
## class 1: 0.0086 0.9914
## class 2: 0.8095 0.1905
## class 3: 0.0572 0.9428
##
## $mhdiageval
## Pr(1) Pr(2)
## class 1: 0.0213 0.9787
## class 2: 0.7553 0.2447
## class 3: 0.0642 0.9358
##
## $mhreferral
## Pr(1) Pr(2)
## class 1: 0.0005 0.9995
## class 2: 0.8374 0.1626
## class 3: 0.2346 0.7654
##
## $treatmt
## Pr(1) Pr(2)
## class 1: 0.2448 0.7552
## class 2: 0.7956 0.2044
## class 3: 0.5504 0.4496
##
## $adminserv
## Pr(1) Pr(2)
## class 1: 0.1023 0.8977
## class 2: 0.7305 0.2695
## class 3: 0.4680 0.5320
##
## Estimated class population shares
## 0.3866 0.1045 0.5088
##
## Predicted class memberships (by modal posterior prob.)
## 0.337 0.1048 0.5582
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 12671
## number of estimated parameters: 17
## residual degrees of freedom: 14
## maximum log-likelihood: -29542.72
##
## AIC(3): 59119.45
## BIC(3): 59246.05
## G^2(3): 162.3771 (Likelihood ratio/deviance statistic)
## X^2(3): 180.6466 (Chi-square goodness of fit)
##
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND
##
LCA3 <- poLCA(f1, data=samhsa2015, nclass=3, maxiter=3000, nrep=5)
## Model 1: llik = -29542.71 ... best llik = -29542.71
## Model 2: llik = -29542.71 ... best llik = -29542.71
## Model 3: llik = -29542.71 ... best llik = -29542.71
## Model 4: llik = -29542.71 ... best llik = -29542.71
## Model 5: llik = -29542.71 ... best llik = -29542.71
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $mhintake
## Pr(1) Pr(2)
## class 1: 0.8093 0.1907
## class 2: 0.0571 0.9429
## class 3: 0.0084 0.9916
##
## $mhdiageval
## Pr(1) Pr(2)
## class 1: 0.7553 0.2447
## class 2: 0.0640 0.9360
## class 3: 0.0213 0.9787
##
## $mhreferral
## Pr(1) Pr(2)
## class 1: 0.8374 0.1626
## class 2: 0.2340 0.7660
## class 3: 0.0000 1.0000
##
## $treatmt
## Pr(1) Pr(2)
## class 1: 0.7957 0.2043
## class 2: 0.5497 0.4503
## class 3: 0.2441 0.7559
##
## $adminserv
## Pr(1) Pr(2)
## class 1: 0.7306 0.2694
## class 2: 0.4672 0.5328
## class 3: 0.1013 0.8987
##
## Estimated class population shares
## 0.1046 0.5109 0.3846
##
## Predicted class memberships (by modal posterior prob.)
## 0.1048 0.5582 0.337
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## number of observations: 12671
## number of estimated parameters: 17
## residual degrees of freedom: 14
## maximum log-likelihood: -29542.71
##
## AIC(3): 59119.43
## BIC(3): 59246.03
## G^2(3): 162.3576 (Likelihood ratio/deviance statistic)
## X^2(3): 180.5808 (Chi-square goodness of fit)
##
LCA4 <- poLCA(f1, data=samhsa2015, nclass=4, maxiter=3000, nrep=5)
## Model 1: llik = -29470.63 ... best llik = -29470.63
## Model 2: llik = -29470.63 ... best llik = -29470.63
## Model 3: llik = -29470.63 ... best llik = -29470.63
## Model 4: llik = -29470.63 ... best llik = -29470.63
## Model 5: llik = -29471.05 ... best llik = -29470.63
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $mhintake
## Pr(1) Pr(2)
## class 1: 0.7393 0.2607
## class 2: 0.0261 0.9739
## class 3: 0.8260 0.1740
## class 4: 0.0070 0.9930
##
## $mhdiageval
## Pr(1) Pr(2)
## class 1: 0.3948 0.6052
## class 2: 0.0683 0.9317
## class 3: 0.8256 0.1744
## class 4: 0.0229 0.9771
##
## $mhreferral
## Pr(1) Pr(2)
## class 1: 0.5311 0.4689
## class 2: 0.2508 0.7492
## class 3: 0.9115 0.0885
## class 4: 0.0265 0.9735
##
## $treatmt
## Pr(1) Pr(2)
## class 1: 0.5752 0.4248
## class 2: 0.5873 0.4127
## class 3: 0.8607 0.1393
## class 4: 0.2784 0.7216
##
## $adminserv
## Pr(1) Pr(2)
## class 1: 0.2738 0.7262
## class 2: 0.5488 0.4512
## class 3: 0.9175 0.0825
## class 4: 0.1232 0.8768
##
## Estimated class population shares
## 0.0615 0.3895 0.0703 0.4787
##
## Predicted class memberships (by modal posterior prob.)
## 0.05 0.3521 0.068 0.53
##
## =========================================================
## Fit for 4 latent classes:
## =========================================================
## number of observations: 12671
## number of estimated parameters: 23
## residual degrees of freedom: 8
## maximum log-likelihood: -29470.63
##
## AIC(4): 58987.27
## BIC(4): 59158.55
## G^2(4): 18.19505 (Likelihood ratio/deviance statistic)
## X^2(4): 18.11012 (Chi-square goodness of fit)
##
#LCA4 <- poLCA(f1, data=samhsa2015, nclass=4, maxiter=5000, nrep=5)
LCA5 <- poLCA(f1, data=samhsa2015, nclass=5, maxiter=5000, nrep=10)
## Model 1: llik = -29463.4 ... best llik = -29463.4
## Model 2: llik = -29464.87 ... best llik = -29463.4
## Model 3: llik = -29462.96 ... best llik = -29462.96
## Model 4: llik = -29463.34 ... best llik = -29462.96
## Model 5: llik = -29464.18 ... best llik = -29462.96
## Model 6: llik = -29463.35 ... best llik = -29462.96
## Model 7: llik = -29463.14 ... best llik = -29462.96
## Model 8: llik = -29463 ... best llik = -29462.96
## Model 9: llik = -29463.72 ... best llik = -29462.96
## Model 10: llik = -29467.61 ... best llik = -29462.96
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $mhintake
## Pr(1) Pr(2)
## class 1: 0.0315 0.9685
## class 2: 0.1269 0.8731
## class 3: 0.8232 0.1768
## class 4: 0.0000 1.0000
## class 5: 0.7709 0.2291
##
## $mhdiageval
## Pr(1) Pr(2)
## class 1: 0.0767 0.9233
## class 2: 0.0265 0.9735
## class 3: 0.8009 0.1991
## class 4: 0.0246 0.9754
## class 5: 0.4826 0.5174
##
## $mhreferral
## Pr(1) Pr(2)
## class 1: 0.2556 0.7444
## class 2: 0.3482 0.6518
## class 3: 0.8874 0.1126
## class 4: 0.0002 0.9998
## class 5: 0.5509 0.4491
##
## $treatmt
## Pr(1) Pr(2)
## class 1: 0.7013 0.2987
## class 2: 0.1069 0.8931
## class 3: 0.8405 0.1595
## class 4: 0.2740 0.7260
## class 5: 0.6540 0.3460
##
## $adminserv
## Pr(1) Pr(2)
## class 1: 0.5283 0.4717
## class 2: 0.4144 0.5856
## class 3: 0.9536 0.0464
## class 4: 0.1446 0.8554
## class 5: 0.0724 0.9276
##
## Estimated class population shares
## 0.3292 0.0935 0.0754 0.4594 0.0425
##
## Predicted class memberships (by modal posterior prob.)
## 0.2173 0.0378 0.0762 0.6309 0.0379
##
## =========================================================
## Fit for 5 latent classes:
## =========================================================
## number of observations: 12671
## number of estimated parameters: 29
## residual degrees of freedom: 2
## maximum log-likelihood: -29462.96
##
## AIC(5): 58983.91
## BIC(5): 59199.88
## G^2(5): 2.843112 (Likelihood ratio/deviance statistic)
## X^2(5): 2.913671 (Chi-square goodness of fit)
##
## ALERT: iterations finished, MAXIMUM LIKELIHOOD NOT FOUND
##
### with covariate - 'offer pay assistance'
#slide19:
f2<- as.formula(cbind(mhintake, mhdiageval, mhreferral, treatmt, adminserv)~payasst)
#set.seed(4589)
LCA3c <- poLCA(f2, data=samhsa2015, nclass=3, maxiter=5000, nrep=5)
## Model 1: llik = -28726.57 ... best llik = -28726.57
## Model 2: llik = -28726.57 ... best llik = -28726.57
## Model 3: llik = -30321.84 ... best llik = -28726.57
## Model 4: llik = -28726.57 ... best llik = -28726.57
## Model 5: llik = -30321.84 ... best llik = -28726.57
## Conditional item response (column) probabilities,
## by outcome variable, for each class (row)
##
## $mhintake
## Pr(1) Pr(2)
## class 1: 0.0851 0.9149
## class 2: 0.0090 0.9910
## class 3: 0.8347 0.1653
##
## $mhdiageval
## Pr(1) Pr(2)
## class 1: 0.0826 0.9174
## class 2: 0.0231 0.9769
## class 3: 0.7964 0.2036
##
## $mhreferral
## Pr(1) Pr(2)
## class 1: 0.3216 0.6784
## class 2: 0.0006 0.9994
## class 3: 0.8442 0.1558
##
## $treatmt
## Pr(1) Pr(2)
## class 1: 0.5724 0.4276
## class 2: 0.3141 0.6859
## class 3: 0.8076 0.1924
##
## $adminserv
## Pr(1) Pr(2)
## class 1: 0.4804 0.5196
## class 2: 0.1735 0.8265
## class 3: 0.7491 0.2509
##
## Estimated class population shares
## 0.402 0.501 0.0969
##
## Predicted class memberships (by modal posterior prob.)
## 0.3312 0.5798 0.0889
##
## =========================================================
## Fit for 3 latent classes:
## =========================================================
## 2 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) -0.11932 0.19466 -0.613 0.551
## payasst 0.66159 0.07089 9.332 0.000
## =========================================================
## 3 / 1
## Coefficient Std. error t value Pr(>|t|)
## (Intercept) -1.40758 0.09212 -15.280 0.000
## payasst -0.03516 0.09742 -0.361 0.724
## =========================================================
## number of observations: 12278
## number of estimated parameters: 19
## residual degrees of freedom: 12
## maximum log-likelihood: -28726.57
##
## AIC(3): 57491.14
## BIC(3): 57632.03
## X^2(3): 193.2729 (Chi-square goodness of fit)
##
## ALERT: estimation algorithm automatically restarted with new initial values
##
#predicted class membership is in:
LCA3$predclass[1:30]
## [1] 2 2 3 3 1 2 2 2 2 3 3 3 2 2 1 2 3 3 2 2 2 3 2 2 3 2 3 3 2 2
#could be used as another variable (part of the data):
samhsa2015$LCAf5 <- LCA3$predclass