library(dplyr)
library(plyr)
library(poLCA)
library(plotly)
library(ggplot2)
mydata <- read.csv('ClientAllData.csv')[-1]
lca_subset<-subset(mydata, select=c(StudyClientId, age_group, Gender, Race, MaritalStatus, Religion, Aod1_class, Aod2_class, Disruption_class, LegalNonConform_class, LifeFunctionality_class, Suicide_class, Homicide_class, Trauma_class))
lca_subset$Gender[!(lca_subset$Gender %in% c("M","F"))]<- NA
lca_subset$Race[lca_subset$Race %in% c("Not on file","Undisclosed")] <- NA
lca_subset$MaritalStatus[lca_subset$MaritalStatus %in% c("","Not Specified")] <- NA
lca_subset$Religion[lca_subset$Religion %in% c('','Unknown')] <- NA
lca_subset <- lca_subset %>% tidyr::drop_na()
# make sure all variables are factors
lca_subset[-1] <- lapply(lca_subset[-1], factor)
Variables in parentheses are the latent class classification variables.
Variables outside of the parentheses are covariates (not included in the LCA).
Finally, run the LCA specifying a range of classes
LCA: Classification VARS: ASUS / Risk/ Trauma
covariates: demographics (no location)
chisq+post-hoc: latent class vs. Outcome: Incident Score –> binary
f <- cbind(Aod1_class,Aod2_class,Disruption_class,LegalNonConform_class,LifeFunctionality_class,Suicide_class,Homicide_class,Trauma_class)~age_group+Race+MaritalStatus+Religion
sapply(lca_subset, function(x) sum(is.na(x)))
# latent class analysis specifying 1-3 classes
lCA1 <- poLCA(f,lca_subset, nclass=1,nrep=15)
lCA2 <- poLCA(f,lca_subset, nclass=2,nrep=15, graphs = T)
lCA3 <- poLCA(f,lca_subset, nclass=3,nrep=15, graphs = T)
# Calculate entropy (3-class mode)l- values closer to 1.0 indicate greater separation of the classes.
entropy<-function (p) sum(-p*log(p))
error_prior <- entropy(lCA3$P) # Class proportions
error_post <- mean(apply(lCA3$posterior, 1, entropy))
LCA3_entropy <- (error_prior - error_post) / error_prior
LCA3_entropy
#predicted class membership is in:
lCA3$predclass[1:30]
#add variable to data set with all variables so it can be used as predictor variable:
lca_subset$class <- lCA3$predclass
data <- mydata %>%
inner_join(lca_subset[,c(1,15)], by = "StudyClientId")
write.csv(data,"client_class.csv")