diss_matrix <- matrix(c(
0, 0.3, 0.4, 0.7,
0.3, 0, 0.5, 0.8,
0.4, 0.5, 0, 0.45,
0.7, 0.8, 0.45, 0
), nrow = 4, byrow = TRUE)
rownames(diss_matrix) <- colnames(diss_matrix) <- paste0("Obs", 1:4)
hc_complete <- hclust(as.dist(diss_matrix), method = "complete")
plot(hc_complete, main = "Complete Linkage Dendrogram", ylab = "Height")
hc_single <- hclust(as.dist(diss_matrix), method = "single")
plot(hc_single, main = "Single Linkage Dendrogram", ylab = "Height")
cutree(hc_complete, k = 2)
## Obs1 Obs2 Obs3 Obs4
## 1 1 2 2
cutree(hc_single, k = 2)
## Obs1 Obs2 Obs3 Obs4
## 1 1 1 2
plot(hc_complete, hang = -1, main = "Alternate Complete Linkage Dendrogram")
data <- data.frame(
Obs = 1:6,
X1 = c(1, 1, 0, 5, 6, 4),
X2 = c(4, 3, 4, 1, 2, 0)
)
plot(data$X1, data$X2, xlab = "X1", ylab = "X2", pch = 19, main = "Observations")
set.seed(123)
data$Cluster <- sample(1:2, size = nrow(data), replace = TRUE)
cat("Initial Cluster Assignments:\n")
## Initial Cluster Assignments:
print(data)
## Obs X1 X2 Cluster
## 1 1 1 4 1
## 2 2 1 3 1
## 3 3 0 4 1
## 4 4 5 1 2
## 5 5 6 2 1
## 6 6 4 0 2
c1 <- data[data$Cluster == 1, ]
c2 <- data[data$Cluster == 2, ]
# Centroids
c1_x <- mean(c1$X1)
c1_y <- mean(c1$X2)
c2_x <- mean(c2$X1)
c2_y <- mean(c2$X2)
cat("Centroid of Cluster 1:", c1_x, c1_y, "\n")
## Centroid of Cluster 1: 2 3.25
cat("Centroid of Cluster 2:", c2_x, c2_y, "\n")
## Centroid of Cluster 2: 4.5 0.5
data$Cluster <- apply(data[, c("X1", "X2")], 1, function(row) {
d1 <- sqrt((row[1] - c1_x)^2 + (row[2] - c1_y)^2)
d2 <- sqrt((row[1] - c2_x)^2 + (row[2] - c2_y)^2)
ifelse(d1 < d2, 1, 2)
})
print(data)
## Obs X1 X2 Cluster
## 1 1 1 4 1
## 2 2 1 3 1
## 3 3 0 4 1
## 4 4 5 1 2
## 5 5 6 2 2
## 6 6 4 0 2
repeat {
old_cluster <- data$Cluster
c1 <- data[data$Cluster == 1, ]
c2 <- data[data$Cluster == 2, ]
c1_x <- mean(c1$X1)
c1_y <- mean(c1$X2)
c2_x <- mean(c2$X1)
c2_y <- mean(c2$X2)
data$Cluster <- apply(data[, c("X1", "X2")], 1, function(row) {
d1 <- sqrt((row[1] - c1_x)^2 + (row[2] - c1_y)^2)
d2 <- sqrt((row[1] - c2_x)^2 + (row[2] - c2_y)^2)
ifelse(d1 < d2, 1, 2)
})
if (all(data$Cluster == old_cluster)) break
}
cat("Final cluster assignments after convergence:\n")
## Final cluster assignments after convergence:
print(data)
## Obs X1 X2 Cluster
## 1 1 1 4 1
## 2 2 1 3 1
## 3 3 0 4 1
## 4 4 5 1 2
## 5 5 6 2 2
## 6 6 4 0 2
plot(data$X1, data$X2, col = data$Cluster, pch = 19,
main = "Final Clusters (Manual K-Means)", xlab = "X1", ylab = "X2")
text(data$X1 + 0.2, data$X2, labels = data$Obs)
points <- matrix(c(
1, 4,
1, 3,
0, 4,
5, 1,
6, 2,
4, 0
), ncol = 2, byrow = TRUE)
rownames(points) <- paste0("Obs", 1:6)
points
## [,1] [,2]
## Obs1 1 4
## Obs2 1 3
## Obs3 0 4
## Obs4 5 1
## Obs5 6 2
## Obs6 4 0
hc_single <- hclust(dist(points), method = "single")
hc_complete <- hclust(dist(points), method = "complete")
par(mfrow = c(1, 2))
plot(hc_single, main = "Single Linkage", xlab = "", sub = "", cex = 0.8)
plot(hc_complete, main = "Complete Linkage", xlab = "", sub = "", cex = 0.8)
Since single linkage merges based on the shortest distance between clusters and complete linkage merges based on the furthest distance, the {1,2,3} and {4,5} merge will happen at a higher height in complete linkage.
dists <- as.matrix(dist(points))
dists["Obs5", "Obs6"]
## [1] 2.828427
They will fuse at the same height (2.83) in both single and complete linkage because Obs5 and Obs6 are individual points, and the only pairwise distance considered is 2.83.