TP :Clustering et Modèles de mélange

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

library(R.matlab)

## R.matlab v3.6.2 (2018-09-26) successfully loaded. See ?R.matlab for help.

## 
## Attachement du package : 'R.matlab'

## Les objets suivants sont masqués depuis 'package:base':
## 
##     getOption, isOpen

library(Rmixmod)

## Le chargement a nécessité le package : Rcpp

## Rmixmod v. 2.1.6 / URI: www.mixmod.org

library(mclust)

## Package 'mclust' version 5.4.9
## Type 'citation("mclust")' for citing this R package in publications.

library(NbClust)
library(FactoMineR)
library(lle)

## Le chargement a nécessité le package : scatterplot3d

## Le chargement a nécessité le package : MASS

## Le chargement a nécessité le package : snowfall

## Le chargement a nécessité le package : snow

library(cluster)
library(aricode)
library(Rtsne)
library(caret)

## Le chargement a nécessité le package : ggplot2

## Le chargement a nécessité le package : lattice

op=readMat("DATA_MATLAB - Projet-master-MLDS/Optdigits.mat")
optdigits=lapply(op, unlist, use.names=FALSE)
optdigits=as.data.frame(op$X)
class=as.factor(t(as.data.frame(op$y)))
optdigits=data.frame(op$X,class)

rm(class)
dim(optdigits)

## [1] 5620   65

table(optdigits$class)

## 
##   1   2   3   4   5   6   7   8   9  10 
## 571 557 572 568 558 558 566 554 562 554

2 ACP

L’ACP sur le jeu de données optdigits ne permet pas une bonne séparation des classes. Il ne faut pas oublier que l’on demande içi de réduire d’une dimension 64 à une dimension de 2.

pca.optdigits=PCA(optdigits,ncp=2,quali.sup = 65)

plot(pca.optdigits, habillage=65)

##3 KMeans et CAH avec différents critères d’aggrégation

km.optdigits=NbClust(optdigits[,-65], method = "kmeans", distance = "euclidean", index = "silhouette",max.nc = 10)

 barplot(table(km.optdigits$Best.partition), 
          xlab="Numer of Clusters", ylab="Number of Criteria",
          main="Number of Clusters Chosen ")

plot(pca.optdigits$ind$coord,col=km.optdigits$Best.partition)

table(km.optdigits$Best.partition,optdigits$class)

##     
##        1   2   3   4   5   6   7   8   9  10
##   1   10  29 509   0 114   0   0  22 406   0
##   2   40 480   5   0   0   0   0   2   0   0
##   3    0   0   0 484   1   1   1   0   1   4
##   4  346   3   3   8   1   6   3  52   1   0
##   5    1   0   5   4 434   0   3   6   7   0
##   6    2   6  17  36   0   0 545   1  38   0
##   7    0  35  30   9   0   1   6 454   5   0
##   8    0   1   0   0   0   1   0   0   0 549
##   9  168   2   3  23   5   0   8  12 104   0
##   10   4   1   0   4   3 549   0   5   0   1

NMI(km.optdigits$Best.partition,optdigits$class)

## [1] 0.748943

ARI(km.optdigits$Best.partition,optdigits$class)

## [1] 0.6718752

ward.optdigits=NbClust(optdigits[,-65],method = "ward.D",index = "silhouette", max.nc = 10)

 barplot(table(ward.optdigits$Best.partition), 
          xlab="Numer of Clusters", ylab="Number of Criteria",
          main="Number of Clusters Chosen ")

plot(pca.optdigits$ind$coord,col=ward.optdigits$Best.partition,main = "CAH Ward sur optdigits")

NMI(ward.optdigits$Best.partition,optdigits$class)

## [1] 0.7967357

ARI(ward.optdigits$Best.partition,optdigits$class)

## [1] 0.7263758

average.optdigits=NbClust(optdigits[,-65],method = "average", max.nc = 10,index = "silhouette")

barplot(table(average.optdigits$Best.partition), 
          xlab="Numer of Clusters", ylab="Number of Criteria",
          main="Number of Clusters Chosen ")

plot(pca.optdigits$ind$coord,col=average.optdigits$Best.partition,main = "CAH average sur optdigits")

NMI(average.optdigits$Best.partition,optdigits$class)

## [1] 0.0001767741

ARI(average.optdigits$Best.partition,optdigits$class)

## [1] -1.256398e-06

single.optdigits=NbClust(optdigits[,-65],method = "single",index = "silhouette", max.nc = 10)

barplot(table(single.optdigits$Best.partition), 
          xlab="Numer of Clusters", ylab="Number of Criteria",
          main="Number of Clusters Chosen ")

plot(pca.optdigits$ind$coord,col=single.optdigits$Best.partition,main = "CAH single sur optdigits")

NMI(single.optdigits$Best.partition,optdigits$class)

## [1] 0.0001791119

ARI(single.optdigits$Best.partition,optdigits$class)

## [1] 1.136574e-06

complete.optdigits=NbClust(optdigits[,-65],method = "complete",index = "silhouette",max.nc = 10)

barplot(table(single.optdigits$Best.partition), 
          xlab="Numer of Clusters", ylab="Number of Criteria",
          main="Number of Clusters Chosen ")

plot(pca.optdigits$ind$coord,col=complete.optdigits$Best.partition,main = "CAH complete sur optdigits")

NMI(complete.optdigits$Best.partition,optdigits$class)

## [1] 0.5483787

ARI(complete.optdigits$Best.partition,optdigits$class)

## [1] 0.3607227