(2)

(a) Plot the Observations

data <- data.frame(
  Obs = 1:6,
  X1 = c(1, 1, 0, 5, 6, 4),
  X2 = c(4, 3, 4, 1, 2, 0)
)

plot(data$X1, data$X2, pch=19, xlab="X1", ylab="X2", 
     main="Initial Observations", xlim=c(-1,7), ylim=c(-1,5))
text(data$X1, data$X2, labels=data$Obs, pos=3)

(b) Random Initial Cluster Assignment

set.seed(123) # For reproducibility
initial_clusters <- sample(1:2, size=6, replace=TRUE)
data$Cluster <- initial_clusters
print(data[,c("Obs", "Cluster")])
##   Obs Cluster
## 1   1       1
## 2   2       1
## 3   3       1
## 4   4       2
## 5   5       1
## 6   6       2

(c) Compute Centroids

centroids <- aggregate(.~Cluster, data=data[,c("X1","X2","Cluster")], mean)
print(centroids)
##   Cluster  X1   X2
## 1       1 2.0 3.25
## 2       2 4.5 0.50

(d) Reassign Observations to Nearest Centroid

dist_to_c1 <- sqrt((data$X1-centroids$X1[1])^2 + (data$X2-centroids$X2[1])^2)
dist_to_c2 <- sqrt((data$X1-centroids$X1[2])^2 + (data$X2-centroids$X2[2])^2)

data$NewCluster <- ifelse(dist_to_c1 < dist_to_c2, 1, 2)
print(data[,c("Obs", "NewCluster")])
##   Obs NewCluster
## 1   1          1
## 2   2          1
## 3   3          1
## 4   4          2
## 5   5          2
## 6   6          2
# Check if clusters changed
all(data$Cluster == data$NewCluster) 
## [1] FALSE

(e) Iterate Until Convergence

Repeat (c) and (d) until cluster assignments stop changing. Typically converges in 2-3 iterations for this small dataset.

After 1st iteration: * Check if clusters changed

  • The cluster hasn’t changed.

  • Thus we can conclude the clusters have stabilized

  • Final centroids are:

  • Cluster 1 centroid = [0.66,3.66 ]

  • Cluster 2 centroid = [5, 1]

  • Clusters should stabilize

  • Final centroids might be:

    • Cluster 1: (0.66,3.66 ) - mean of obs 1,2,3

    • Cluster 2: (5, 1) - mean of obs 4,5,6

(f) Final Clustering Plot

final_clusters <- c(1, 1, 1, 2, 2, 2) 

plot(data$X1, data$X2, pch=19, col=final_clusters,
     xlab="X1", ylab="X2", main="Final Clustering")
text(data$X1, data$X2, labels=data$Obs, pos=3)
legend("topright", legend=c("Cluster 1", "Cluster 2"), 
       col=1:2, pch=19)