# HLCM vs MCLUST vs MDA
experiment to asses the different classes found by each method discusing about the individual observations predicted by each
data(Sonar, package = "mlbench")
Sonar$Class <- 1*(Sonar$Class == "M")
Compare.df <- as.data.frame( Sonar$Class)
set.seed(42)
sonarHlcm <- HLCM_EM(Class ~., Sonar,hysteresis = 0.1)
## [+++++-][++--][+++++++]( 50 )[+++-][++++-]( 22 )[++-][+++++-]( 11 )[+++--][++++++---]( 14 )[++-][++++-]( 10 )[++-][++-+--]( 16 )[++-][+++-]( 15 )[++-][+++-]( 14 )[+--][+++-]( 22 )[+-][+++++-]( 4 )< 168 , 160 , 106 , 5 >
predict.latentClass <- function(model,data){
testResult <- predict(model,data)
#eliminate the first column because it isnt a class
prbclass <- attr(testResult,"probabilities")[,-1]
latentclass <- apply(prbclass,1,which.max)
result <- list(classification = 2*latentclass + 1*(testResult >= 0.5),
outcome=testResult,
latentclass=latentclass)
return(result)
}
prd.Hlcm <- predict.latentClass(sonarHlcm,Sonar)
#2. get the classification per class 2* lattentclass + 1*(testResult >= 0.5)
# [one per diferent classification for different latent class]
prd.Hlcm.2 <- prd.Hlcm$classification
Compare.df <- cbind(Compare.df,prd.Hlcm.2 )
Compare.df$`Sonar$Class` <- NULL
SonarMclustDA <- MclustDA(data =Sonar ,class = Sonar$Class)
MclustDA.getclass <- function(MclustDA.obj) {
models <-MclustDA.obj$models
print( paste("classes found: ", length(MclustDA.obj$models)) )
predicted <- c()
i= 1
G = 0 #number of components found in previous model
for (model in models){
print( paste("components found in class (",i,"): ",model$G),sep ="" )
predicted <- c(predicted,model$classification +G)
G = model$G + G # to differenciate from components with same label but different class
i = i+1
}
return(predicted)
}
prd.mclust <- MclustDA.getclass(SonarMclustDA)
## [1] "classes found: 2"
## [1] "components found in class ( 1 ): 5"
## [1] "components found in class ( 2 ): 1"
#summary(SonarMclustDA)
Compare.df <- cbind(Compare.df,prd.mclust)
sonarMDA <- mda(Class ~ ., Sonar)
MDA.getclass <- function(MDA.obj) {
print( paste("classes found: ", length(MDA.obj$weights)) )
predicted <- c()
i= 1
G = 0 #number of components found in previous model
for (weights in MDA.obj$weights){
print( paste("components found in class (",i,"): ",dim(weights)[2] ))
print( paste("observations found in class (",i,"): ",dim(weights)[1] ))
component <- apply(weights,1,which.max) # get the component which has max prob
predicted <- c(predicted,component+G)
G = dim(weights)[2] + G # to differenciate from components with same label but different class
i = i+1
}
return(predicted)
}
#Number of subclasses per class, default is 3. Can be a vector with a number for each class.
prd.MDA <- MDA.getclass(sonarMDA) # abbreviations are allowed
## [1] "classes found: 2"
## [1] "components found in class ( 1 ): 3"
## [1] "observations found in class ( 1 ): 97"
## [1] "components found in class ( 2 ): 3"
## [1] "observations found in class ( 2 ): 111"
Compare.df <- cbind(Compare.df, prd.MDA)
hlcm.vs.mclust <- jaccardMatrix(Compare.df[,1], Compare.df[,2])
hlcm.vs.mda <- jaccardMatrix(Compare.df[,1], Compare.df[,3])
mclust.vs.mda <- jaccardMatrix(Compare.df[,2], Compare.df[,3])
hlcm.vs.mclust$balancedMeanJaccard
## [1] 0.3264524
hlcm.vs.mda$balancedMeanJaccard
## [1] 0.3704139
mclust.vs.mda$balancedMeanJaccard
## [1] 0.384201