USPSPS Bayesian Latent Class Analysis

Run LCA to determine optimal number of classes

#run series of models with between 2-10 classes, create table of model fits, and print out the model with the lowest BIC
#ctocc.bLCA.gibbs.10 <- blca.gibbs(ctoccbayesdat.n, 10, relabel = TRUE, burn.in = 1000)

min_bicm <- 100000
for (i in 2:10) {
  bayeslc <- blca.gibbs(ctoccbayesdat.n, i, relabel = TRUE, burn.in = 1000)
  modname <- paste0("bayeslc.",i)
  assign(modname, bayeslc)
  if(bayeslc$BICM < min_bicm) {
    min_bicm <- bayeslc$BICM
    LCA_best_model <- bayeslc
  }
  if (i == 2) {
     bayeslc.fits <- cbind("2 classes", bayeslc$BICM, bayeslc$AICM, bayeslc$DIC, bayeslc$logpost)
  }
  if (i > 2) {
    nclasses <- paste(i,"classes")
    bayeslc.fitsnew <- cbind(nclasses, bayeslc$BICM, bayeslc$AICM, bayeslc$DIC, bayeslc$logpost)
    bayeslc.fits <- rbind(bayeslc.fits, bayeslc.fitsnew)
  }
}

## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.
## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.
## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.
## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.
## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.
## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.
## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.
## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.
## Initialising sampler...starting burn-in.
## Burn-in completed...
## 1000 of 5000 samples completed...
## 2000 of 5000 samples completed...
## 3000 of 5000 samples completed...
## 4000 of 5000 samples completed...
## 5000 of 5000 samples completed...
## Sampling run completed.

colnames(bayeslc.fits) <- c("N Classes", "BICM", "AICM", "DIC", "LLPost")
bayeslc.fits

##       N Classes    BICM                AICM                DIC                
##  [1,] "2 classes"  "-10959.2304287112" "-10798.0760154067" "-10795.400964032" 
##  [2,] "3 classes"  "-10754.6654871407" "-10522.2219854563" "-10519.2655134368"
##  [3,] "4 classes"  "-10754.1929139989" "-10388.7267292401" "-10368.7387327939"
##  [4,] "5 classes"  "-10835.9407208804" "-10386.7564317865" "-10352.6828411943"
##  [5,] "6 classes"  "-11062.6848869123" "-10410.3734961344" "-10322.5036195612"
##  [6,] "7 classes"  "-11224.6483510009" "-10426.6719668361" "-10310.602571426" 
##  [7,] "8 classes"  "-11361.5373797497" "-10441.4458375747" "-10291.9907771327"
##  [8,] "9 classes"  "-11485.8303755728" "-10457.6574872074" "-10282.6600162105"
##  [9,] "10 classes" "-11582.1655880301" "-10472.3849198851" "-10278.6650916217"
##       LLPost             
##  [1,] "-5367.49178107868"
##  [2,] "-5215.15885200041"
##  [3,] "-5129.7835472609" 
##  [4,] "-5118.75841549243"
##  [5,] "-5116.01676068147"
##  [6,] "-5108.542637946"  
##  [7,] "-5107.70466062511"
##  [8,] "-5106.52756173291"
##  [9,] "-5106.60029086002"

#export table of fits
#write.csv(bayeslc.fits, "BayesLCAmodelfit.csv", row.names=F)

#merge results from best fitting model with dataset
#ctocpost <- cbind(ctoccsv.yn.comp.class, ctocc.bLCA.gibbs.4$Z)
#colnames(ctocpost)[1370:1373] <- c("BayesC1", "BayesC2", "BayesC3", "BayesC4")
#ctocpost <- cbind(ctocpost, "BayesClass" = apply(ctocc.bLCA.gibbs.4$Z,1, FUN=which.max))

#save estimates
#write.csv(ctocpost, "ctoccLCAwithBayesian2.csv", row.names=F)

DY-BOCS Regression Models on 4 Classes

ctocclusterdat <- read.csv("ctoccLCAwithBayesian2.csv")

ctocclusterdat <- ctocclusterdat %>% 
  rename(Aggression = TOTALAGR, Taboo = TOTALSEX, Symmetry = TOTALSIM, Hoarding = TOTALCOL, Washing = TOTALCON, Miscellaneous = TOTALSIN)

# Class 1
class1mod <- lm(scale(BayesC1) ~ scale(Aggression) + scale(Taboo) + scale(Symmetry) + scale(Hoarding) + scale(Washing) + scale(Miscellaneous), data=ctocclusterdat)
tab_model(class1mod, show.std = T, show.ci = .90, show.est = F, show.stat = T)

	scale(BayesC1)
Predictors	std. Beta	standardized CI	Statistic	p
(Intercept)	0.00	-0.05 – 0.05	0.00	0.935
Aggression	0.09	0.03 – 0.14	2.56	0.011
Taboo	0.01	-0.05 – 0.06	0.24	0.808
Symmetry	-0.32	-0.37 – -0.26	-9.13	<0.001
Hoarding	-0.03	-0.09 – 0.02	-1.07	0.286
Washing	-0.04	-0.09 – 0.02	-1.08	0.280
Miscellaneous	-0.06	-0.12 – -0.01	-1.86	0.063
Observations	991
R² / R² adjusted	0.124 / 0.119

# Class 2
class2mod <- lm(scale(BayesC2) ~ scale(Aggression) + scale(Taboo) + scale(Symmetry) + scale(Hoarding) + scale(Washing) + scale(Miscellaneous), data=ctocclusterdat)
tab_model(class2mod, show.std = T, show.ci = .90, show.est = F, show.stat = T)

	scale(BayesC2)
Predictors	std. Beta	standardized CI	Statistic	p
(Intercept)	-0.00	-0.05 – 0.05	-0.00	0.967
Aggression	-0.10	-0.16 – -0.04	-2.81	0.005
Taboo	-0.02	-0.08 – 0.03	-0.71	0.481
Symmetry	-0.00	-0.06 – 0.06	-0.03	0.978
Hoarding	-0.03	-0.09 – 0.02	-1.00	0.319
Washing	-0.09	-0.15 – -0.04	-2.74	0.006
Miscellaneous	0.11	0.05 – 0.17	3.16	0.002
Observations	991
R² / R² adjusted	0.026 / 0.020

# Class 3
class3mod <- lm(scale(BayesC3) ~ scale(Aggression) + scale(Taboo) + scale(Symmetry) + scale(Hoarding) + scale(Washing) + scale(Miscellaneous), data=ctocclusterdat)
tab_model(class3mod, show.std = T, show.ci = .90, show.est = F, show.stat = T)

	scale(BayesC3)
Predictors	std. Beta	standardized CI	Statistic	p
(Intercept)	-0.00	-0.05 – 0.05	-0.00	0.965
Aggression	-0.06	-0.12 – -0.00	-1.78	0.075
Taboo	-0.05	-0.11 – 0.00	-1.52	0.128
Symmetry	0.27	0.21 – 0.33	7.57	<0.001
Hoarding	-0.03	-0.08 – 0.03	-0.77	0.440
Washing	0.11	0.06 – 0.17	3.42	0.001
Miscellaneous	-0.02	-0.08 – 0.04	-0.63	0.527
Observations	991
R² / R² adjusted	0.089 / 0.084

# Class 4
class4mod <- lm(scale(BayesC4) ~ scale(Aggression) + scale(Taboo) + scale(Symmetry) + scale(Hoarding) + scale(Washing) + scale(Miscellaneous), data=ctocclusterdat)
tab_model(class4mod, show.std = T, show.ci = .90, show.est = F, show.stat = T)

	scale(BayesC4)
Predictors	std. Beta	standardized CI	Statistic	p
(Intercept)	-0.00	-0.05 – 0.05	-0.00	0.982
Aggression	0.05	-0.01 – 0.11	1.47	0.142
Taboo	0.08	0.02 – 0.13	2.37	0.018
Symmetry	0.16	0.11 – 0.22	4.64	<0.001
Hoarding	0.12	0.07 – 0.18	3.80	<0.001
Washing	0.01	-0.04 – 0.07	0.42	0.671
Miscellaneous	0.00	-0.05 – 0.06	0.13	0.893
Observations	991
R² / R² adjusted	0.088 / 0.083

USPSPS Bayesian Latent Class Analysis

Caitlin Stamatis

4/29/2020

Run LCA to determine optimal number of classes

DY-BOCS Regression Models on 4 Classes