Untitled

1) Illustration 1

Données : 6 individus ω1..ω6 avec deux variables X1, X2. Centres initiaux c1⁰=(-1,-1), c2⁰=(2,3).

# Données (ω1..ω6)
library(knitr)
Data <- data.frame(
  id = paste0("w", 1:6),
  X1 = c(-2, -2,  0, 2, -2, 3),
  X2 = c( 2, -1, -1, 2,  3, 0)
)
Data

##   id X1 X2
## 1 w1 -2  2
## 2 w2 -2 -1
## 3 w3  0 -1
## 4 w4  2  2
## 5 w5 -2  3
## 6 w6  3  0

# 1) Nuage de points
plot(Data$X1, Data$X2, pch=19, xlab="X1", ylab="X2")
text(Data$X1, Data$X2, labels=Data$id, pos=3)

# 2) Tableau des distances aux centres initiaux
c1_0 <- c(-1, -1)
c2_0 <- c( 2,  3)

dist_to_center <- function(df, center){
  sqrt((df$X1 - center[1])^2 + (df$X2 - center[2])^2)
}

D0 <- rbind(
  c1_0 = dist_to_center(Data, c1_0),
  c2_0 = dist_to_center(Data, c2_0)
)
colnames(D0) <- Data$id
round(D0, 2)

##        w1   w2   w3   w4   w5   w6
## c1_0 3.16 1.00 1.00 4.24 4.12 4.12
## c2_0 4.12 5.66 4.47 1.00 4.00 3.16

# 3) Affectation aux clusters (plus proche centre) + nouveaux centres de gravité
assign_clusters <- function(D){
  # D: matrice 2 x n, lignes = centres
  apply(D, 2, which.min)
}

cl0 <- assign_clusters(D0)
cl0

## w1 w2 w3 w4 w5 w6 
##  1  1  1  2  2  2

# Indices / individus par cluster
A0 <- Data[cl0==1, ]
B0 <- Data[cl0==2, ]

A0$id

## [1] "w1" "w2" "w3"

B0$id

## [1] "w4" "w5" "w6"

# Centres de gravité
c1_1 <- colMeans(A0[, c("X1","X2")])
c2_1 <- colMeans(B0[, c("X1","X2")])

c1_1

##        X1        X2 
## -1.333333  0.000000

c2_1

##       X1       X2 
## 1.000000 1.666667

# 4) Distances aux nouveaux centres c1¹ et c2¹
D1 <- rbind(
  c1_1 = dist_to_center(Data, c1_1),
  c2_1 = dist_to_center(Data, c2_1)
)
colnames(D1) <- Data$id
round(D1, 2)

##        w1   w2   w3   w4   w5   w6
## c1_1 2.11 1.20 1.67 3.89 3.07 4.33
## c2_1 3.02 4.01 2.85 1.05 3.28 2.60

# 5) Nouvelle affectation + critère d'arrêt (stabilité des clusters)
cl1 <- assign_clusters(D1)

A1 <- Data[cl1==1, ]
B1 <- Data[cl1==2, ]

list(A0=A0$id, B0=B0$id, A1=A1$id, B1=B1$id)

## $A0
## [1] "w1" "w2" "w3"
## 
## $B0
## [1] "w4" "w5" "w6"
## 
## $A1
## [1] "w1" "w2" "w3" "w5"
## 
## $B1
## [1] "w4" "w6"

# Vérifier si Cluster1 et Cluster2 sont identiques à l'étape précédente
identiques_cluster1 <- setequal(A0$id, A1$id)
identiques_cluster2 <- setequal(B0$id, B1$id)

c(Cluster1_identique = identiques_cluster1,
  Cluster2_identique = identiques_cluster2)

## Cluster1_identique Cluster2_identique 
##              FALSE              FALSE

# 6) K-means R "direct" (comme dans le PDF) avec centres imposés
m <- as.matrix(Data[, c("X1","X2")])
clus <- kmeans(m, centers = rbind(c(-1,-1), c(2,3)), algorithm="Lloyd")

clus$cluster

## [1] 1 1 1 2 1 2

clus$centers

##     X1   X2
## 1 -1.5 0.75
## 2  2.5 1.00

plot(m, col=clus$cluster, pch=19, xlab="X1", ylab="X2")
points(clus$centers, pch=8, cex=1.5)

Untitled

Auxane BIKOUTA

2026-01-23

1) Illustration 1