# HLCM vs MCLUST.2 vs MDA.2

experiment to asses the different classes found by each method and evaluate it, using fixed number of components to 2 in MCLUST and MDA method

library(FRESA.CAD)
library(mclust)
library(mlbench)
library(mda)

Sonar Dataset

data(Sonar, package = "mlbench")
Sonar$Class <- 1*(Sonar$Class == "M")

Compare.df2 <- as.data.frame( Sonar$Class)

FRESA HLCM fitting and extraction of classes found

Meta-algorithm that finds lattent classes by EM

  • by getting the classification on every lattent class 2* lattentclass + 1*(testResult >= 0.5) [one per diferent classification for different latent class ]
set.seed(42)

sonarHlcm <- HLCM_EM(Class ~., Sonar,hysteresis = 0.1)
## [++++-][+++-][++++++++]( 45 )[++-][+++++-]( 23 )[+++-][++++++-]( 20 )[+++-][++++++-]( 16 )[+++-][+++--]( 20 )[+++-][++-]( 19 )[+++-][++-]( 5 )[+++-][+++-]( 13 )[++-][+++-]( 14 )[++--][++-+-]( 21 )< 167 , 169 , 100 , 5 >
predict.latentClass <- function(model,data){
  testResult <-  predict(model,data)
  #eliminate the first column because it isnt a class
  prbclass <- attr(testResult,"probabilities")[,-1] 
  latentclass <- apply(prbclass,1,which.max)
  result <- list(classification = 2*latentclass + 1*(testResult >= 0.5),
                 outcome=testResult,
                 latentclass=latentclass)
  
  return(result)
}


prd.Hlcm <- predict.latentClass(sonarHlcm,Sonar)

#2. get the classification per class 2* lattentclass + 1*(testResult >= 0.5) 
# [one per diferent classification for different latent class]
prd.Hlcm.2 <- prd.Hlcm$classification


Compare.df2 <- cbind(Compare.df2,prd.Hlcm.2 )
Compare.df2$`Sonar$Class` <- NULL 

mclustDA fitting and extraction of classes found

Finite Gaussian mixture modelling fitted via EM algorithm for model-based clustering, classification, and density estimation, including Bayesian regularization and dimension reduction.

  • the lattent classes or components are estimated with a model for each label in the target variable
SonarMclustDA <- MclustDA(data =Sonar ,class = Sonar$Class ,G = 2 )

MclustDA.getclass  <- function(MclustDA.obj) {
  models <-MclustDA.obj$models
  print( paste("classes found: ", length(MclustDA.obj$models)) )
  predicted <- c()
  i= 1
  G = 0 #number of components found in previous model 
  for (model in models){
    print( paste("components found in class (",i,"): ",model$G),sep ="" )
    predicted <- c(predicted,model$classification +G)
    G = model$G + G # to differenciate from components with same label but different class
    i = i+1
    }
  return(predicted)
}

prd.mclust <- MclustDA.getclass(SonarMclustDA) 
## [1] "classes found:  2"
## [1] "components found in class ( 1 ):  2"
## [1] "components found in class ( 2 ):  2"
#summary(SonarMclustDA)

Compare.df2 <- cbind(Compare.df2,prd.mclust)

MDA fitting and extraction of classes found

Mixture discriminant analysis.

  • this approach same as the MclustDA generates two classes (one per label in the target variable) and then it creates 3 components by default
sonarMDA <- mda(Class ~ ., Sonar,subclasses = 2)

MDA.getclass  <- function(MDA.obj) {
  print( paste("classes found: ", length(MDA.obj$weights)) )
  predicted <- c()
  i= 1
  G = 0 #number of components found in previous model 
  for (weights in MDA.obj$weights){
    print( paste("components found in class (",i,"): ",dim(weights)[2] ))
    print( paste("observations found in class (",i,"): ",dim(weights)[1] ))
    component <-  apply(weights,1,which.max) # get the component which has max prob
    predicted <- c(predicted,component+G)
    G = dim(weights)[2] + G # to differenciate from components with same label but different class
    i = i+1 
  }
  return(predicted)
}

#Number of subclasses per class, default is 3. Can be a vector with a number for each class.
prd.MDA <- MDA.getclass(sonarMDA) # abbreviations are allowed
## [1] "classes found:  2"
## [1] "components found in class ( 1 ):  2"
## [1] "observations found in class ( 1 ):  97"
## [1] "components found in class ( 2 ):  2"
## [1] "observations found in class ( 2 ):  111"
table(prd.MDA)
## prd.MDA
##  1  2  3  4 
## 62 35 39 72
Compare.df2 <- cbind(Compare.df2, prd.MDA)
hlcm.vs.mclust2 <- jaccardMatrix(Compare.df2[,1], Compare.df2[,2])
hlcm.vs.mda2 <- jaccardMatrix(Compare.df2[,1], Compare.df2[,3])
mclust2.vs.mda2 <- jaccardMatrix(Compare.df2[,2], Compare.df2[,3])


hlcm.vs.mclust2$balancedMeanJaccard
## [1] 0.3868928
hlcm.vs.mda2$balancedMeanJaccard
## [1] 0.445147
mclust2.vs.mda2$balancedMeanJaccard
## [1] 0.6364539