On travaille sur le jeu de données IRIS
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
myDF <- subset(iris, select= -Species)
str(myDF)
## 'data.frame': 150 obs. of 4 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
On utilise le package ade4 pour l’AFC et sa représentation, et le package FactoClass pour la CAH avec distance du \(\chi^2\).
library(ade4)
library(FactoClass)
## Loading required package: xtable
AFC <- dudi.coa(df=myDF, scannf=FALSE, nf=ncol(myDF))
plot.dudi(AFC)
distMat <- dist.dudi(AFC, amongrow=TRUE)
CAH <- ward.cluster(distMat, peso = apply(X=myDF, MARGIN=1, FUN=sum) , plots = TRUE, h.clust = 1)
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"
par(mfrow=c(1,2))
barplot(sort(CAH$height / sum(CAH$height), decreasing = TRUE)[1:15] * 100,
xlab = "Noeuds", ylab = "Part de l'inertie totale (%)",
names.arg=1:15, main="Inertie selon le partitionnement")
barplot(cumsum(sort(CAH$height / sum(CAH$height), decreasing = TRUE))[1:15] * 100,
xlab = "Nombre de classes", ylab = "Part de l'inertie totale (%)",
names.arg=1:15, main="Inertie expliquée")
par(mfrow=c(1,1))
plot(as.dendrogram(CAH), leaflab = "none")
myDF$clusters <- cutree(tree = CAH, k = 3)
s.class(cstar=1,addaxes=TRUE, grid=TRUE, axesell=TRUE,
dfxy=AFC$li, fac=as.factor(myDF$clusters), col=1:3,
label=c(1:3), csub=1.2, possub="bottomright")
plot.dudi(AFC, Tcol = TRUE, Trow = FALSE)
s.class(cstar=1,addaxes=TRUE, grid=FALSE, axesell=TRUE,
dfxy=AFC$li, fac=as.factor(myDF$clusters), col=1:3,
label=c(1:3), csub=1.2, possub="bottomright", add=TRUE)
# On charge des fonctions créées par Romain François - http://blog.r-enthusiasts.com/
source("http://addictedtor.free.fr/packages/A2R/lastVersion/R/code.R")
ordreClasses <- unique(myDF$cluster[CAH$order])
A2Rplot(x = CAH, k = 3, boxes = FALSE, col.up = "gray50", col.down = c(1:3)[ordreClasses], show.labels = FALSE, main = "Dendrogramme")