library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(cluster)

Premiere simulation

Simulation et graph des données

moyenne=c(0,1,3,5,7,9,10,12)
var=c(1,3,5,7,8,9,11,13)
test1=rnorm(100,sample(moyenne,1),sample(var,1))
test2=rnorm(100,sample(moyenne,1),sample(var,1))
test3=rnorm(100,sample(moyenne,1),sample(var,1))
test4=rnorm(100,sample(moyenne,1),sample(var,1))
test5=rnorm(100,sample(moyenne,1),sample(var,1))
test6=rnorm(100,sample(moyenne,1),sample(var,1))

test123=c(test1,test2,test3)
test456=c(test4,test5,test6)

table=data.frame(test123,test456)
plot(table, ylab ="", xlab="",main = "Graph des Datas")

Gap Statistique

GAP=clusGap(table,FUNcluster = kmeans,K.max = 20)
TabTable=GAP$Tab
plot(GAP,main = "Courbe du Gap")

Graph des W, logW et E.logW

TabTable=data.frame(TabTable)

xx = seq(1,20,length=20)
y1= TabTable$logW
y2= TabTable$E.logW


plot(exp(y1),col = "blue", ylab ="W", xlab="clusters",main ="Fonction W")

plot(xx,y1,ylim=c(5,8),col = "red", ylab = "", xlab="clusters", main = "logW et E.logW")
lines(xx,y2,type="p",col="green")

K-Means avec k optimal (ici k=1)

res_table=kmeans(table,1)
table_res=data.frame(table,res_table$cluster)
table_res=table_res %>%
  mutate(couleurs='red')

centres_table=res_table$centers
plot(table_res$test123,table_res$test456,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table,lwd=3,pch=5,col='black')

Deuxieme simulation

Simulation et graph des données

table= rbind(matrix(rnorm(150, sd=1) , ncol=2),matrix(rnorm(150,mean=6,sd=1),ncol=2))
table=data.frame(table)
plot(table, ylab ="", xlab="",main = "Graph des Datas")

Gap Statistique

GAP=clusGap(table,FUNcluster = kmeans,K.max = 20)
TabTable=GAP$Tab
plot(GAP,main = "Courbe du Gap")

Graph des W, logW et E.logW

TabTable=data.frame(TabTable)

xx = seq(1,20,length=20)
y1= TabTable$logW
y2= TabTable$E.logW


plot(exp(y1),col = "blue", ylab ="W", xlab="clusters",main ="Fonction W")

plot(xx,y1,ylim=c(3,6),col = "red", ylab = "", xlab="clusters", main = "logW et E.logW")
lines(xx,y2,type="p",col="green")

K-Means avec k optimal (ici k=2)

res_table=kmeans(table,2)
table_res=data.frame(table,res_table$cluster)
table_res=table_res %>%
  mutate(couleurs=ifelse(table_res$res_table.cluster == 1, 'red','blue'))

centres_table=res_table$centers
plot(table_res$X1,table_res$X2,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table,lwd=3,pch=5,col='black')

Troisieme simulation

Simulation et graph des données

table <- rbind(matrix(rnorm(150,           sd = 0.1), ncol = 3),
           matrix(rnorm(150, mean = 1, sd = 0.1), ncol = 3),
           matrix(rnorm(150, mean = 2, sd = 0.1), ncol = 3),
           matrix(rnorm(150, mean = 3, sd = 0.1), ncol = 3))
table=data.frame(table)
plot(table,main = "Graph des Datas")

Gap Statistique

GAP=clusGap(table,FUNcluster = kmeans,K.max = 20)
TabTable=GAP$Tab
plot(GAP,main = "Courbe du Gap")

Graph des W, logW et E.logW

TabTable=data.frame(TabTable)

xx = seq(1,20,length=20)
y1= TabTable$logW
y2= TabTable$E.logW

plot(exp(y1),col = "blue", ylab ="W", xlab="clusters",main ="Fonction W")

plot(xx,y1,ylim=c(0,6),col = "red", ylab = "", xlab="clusters", main = "logW et E.logW")
lines(xx,y2,type="p",col="green")

K-Means avec k optimal (ici k=6)

res_table=kmeans(table,6)
table_res=data.frame(table,res_table$cluster)

table_res=table_res %>%
  mutate(couleurs=ifelse(table_res$res_table.cluster == 1, 'red',ifelse(table_res$res_table.cluster == 2,'#33FFFF',ifelse(table_res$res_table.cluster == 3,'green',ifelse(table_res$res_table.cluster == 4,'#FFCC00',ifelse(table_res$res_table.cluster == 5,'#990099','#999999'))))))

centres_table=res_table$centers
plot(table_res$X1,table_res$X2,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table,lwd=4,pch=4,col='black')

plot(table_res$X1,table_res$X3,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table,lwd=4,pch=4,col='black')

plot(table_res$X1,table_res$X2,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table[,-2],lwd=4,pch=4,col='black')

plot(table_res$X2,table_res$X3,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table[,-1],lwd=4,pch=4,col='black')

plot(table_res$X3,table_res$X1,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table[,-2],lwd=4,pch=4,col='black')

plot(table_res$X3,table_res$X2,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table[,-1],lwd=4,pch=4,col='black')

Quatrieme simulation

Simulation et graph des données

data("iris")
table=iris[,1:4]
table=data.frame(table)
plot(table,main = "Graph des Datas")

Gap Statistique

GAP=clusGap(table,FUNcluster = kmeans,K.max = 20)
TabTable=GAP$Tab
plot(GAP,main = "Courbe du Gap")

Graph des W, logW et E.logW

TabTable=data.frame(TabTable)

xx = seq(1,20,length=20)
y1= TabTable$logW
y2= TabTable$E.logW

plot(exp(y1),col = "blue", ylab ="W", xlab="clusters",main ="Fonction W")

plot(xx,y1,ylim=c(0,6),col = "red", ylab = "", xlab="clusters", main = "logW et E.logW")
lines(xx,y2,type="p",col="green")

### K-Means avec k optimal (ici k=4) (pas forcement 4 en fait mais c’est le debut du “coude”))

res_table=kmeans(table,4)
table_res=data.frame(table,res_table$cluster)

table_res=table_res %>%
  mutate(couleurs=ifelse(table_res$res_table.cluster == 1, 'red',ifelse(table_res$res_table.cluster == 2,'#33FFFF',ifelse(table_res$res_table.cluster == 3,'#990099','#999999'))))
centres_table=res_table$centers

plot(table_res$Sepal.Length,table_res$Sepal.Width,col=table_res$couleurs,ylab ="Sepal.Length", xlab="Sepal.Width")
points(centres_table,lwd=4,pch=4,col='black')

plot(table_res$Petal.Length,table_res$Petal.Width,col=table_res$couleurs,ylab ="Petal.Length", xlab="Petal.Width")
points(centres_table[,3:4],lwd=4,pch=4,col='black')

Cinquieme simulation

Simulation des et graph données

table= rbind(matrix(runif(150,min = 0,max = 10 ) , ncol=2),
             matrix(runif(150,min= 2.5,max =7.5 ),ncol=2))
table=data.frame(table)
plot(table)

Gap Statistique

GAP=clusGap(table,FUNcluster = kmeans,K.max = 20)
TabTable=GAP$Tab
plot(GAP,main = "Courbe du Gap")

Graph des W, logW et E.logW

TabTable=data.frame(TabTable)

xx = seq(1,20,length=20)
y1= TabTable$logW
y2= TabTable$E.logW

plot(exp(y1),col = "blue", ylab ="W", xlab="clusters",main ="Fonction W")

plot(xx,y1,ylim=c(0,6),col = "red", ylab = "", xlab="clusters", main = "logW et E.logW")
lines(xx,y2,type="p",col="green")

K-Means avec k optimal (ici k=1)

res_table=kmeans(table,1)
table_res=data.frame(table,res_table$cluster)
table_res=table_res %>%
  mutate(couleurs='red')

centres_table=res_table$centers
plot(table_res$X1,table_res$X2,col=table_res$couleurs,ylab ="", xlab="")
points(centres_table,lwd=3,pch=5,col='black')

Sixieme simulation

Simulation des et graph données

table= rbind(matrix(runif(150,min = 0,max = 10 ) , ncol=2),
             matrix(runif(150,min= 0,max =5 ),ncol=2))
table=data.frame(table)
plot(table)

Gap Statistique

GAP=clusGap(table,FUNcluster = kmeans,K.max = 20)
TabTable=GAP$Tab
plot(GAP,main = "Courbe du Gap")

Graph des W, logW et E.logW

TabTable=data.frame(TabTable)

xx = seq(1,20,length=20)
y1= TabTable$logW
y2= TabTable$E.logW

plot(exp(y1),col = "blue", ylab ="W", xlab="clusters",main ="Fonction W")

plot(xx,y1,ylim=c(0,6),col = "red", ylab = "", xlab="clusters", main = "logW et E.logW")
lines(xx,y2,type="p",col="green")

res_table=kmeans(table,2)
table_res=data.frame(table,res_table$cluster)
table_res=table_res %>%
  mutate(couleurs=ifelse(table_res$res_table.cluster == 1, 'red','blue'))

centres_table=res_table$centers
plot(table_res$X1,table_res$X2,col=table_res$couleurs)
points(centres_table,lwd=3,pch=5,col='black')