heart <- read.csv(“framingham.csv”)
str(heart) summary(heart) colSums(is.na(heart))
num_cols <- names(heart)[sapply(heart, is.numeric)] heart_num <- heart[, num_cols, drop = FALSE]
if (“TenYearCHD” %in% names(heart_num)) heart_num$TenYearCHD <- NULL
heart_num <- na.omit(heart_num)
dim(heart_num) summary(heart_num)
X <- scale(heart_num)
d <- dist(X, method = “euclidean”) hc <- hclust(d, method = “ward.D2”)
plot(hc, labels = FALSE, hang = -1, main = “Hierarchical Clustering Dendrogram (Ward.D2)”)
K <- 3 cl_hc <- cutree(hc, k = K)
table(cl_hc)
plot(hc, labels = FALSE, hang = -1, main = paste0(“Dendrogram with Cut (K =”, K, “)”)) rect.hclust(hc, k = K, border = 2:6)
heart_profile <- heart_num heart_profile$cluster <- factor(cl_hc)
aggregate(. ~ cluster, data = heart_profile, FUN = mean)
wss <- sapply(1:10, function(k) { kmeans(X, centers = k, nstart = 20)$tot.withinss })
plot(1:10, wss, type = “b”, xlab = “Number of clusters (k)”, ylab = “Total Within-Cluster Sum of Squares”, main = “Elbow Method for K-means”)
set.seed(937) km <- kmeans(X, centers = K, nstart = 50)
table(km$cluster)
heart_profile_km <- heart_num heart_profile_km\(cluster <- factor(km\)cluster)
aggregate(. ~ cluster, data = heart_profile_km, FUN = mean)
pca <- prcomp(X)
pca_df <- data.frame( PC1 = pca\(x[,1], PC2 = pca\)x[,2], HC_Cluster = factor(cl_hc), KM_Cluster = factor(km$cluster) )
plot(pca_df\(PC1, pca_df\)PC2, col = pca_df\(HC_Cluster, pch = 16, xlab = "PC1", ylab = "PC2", main = paste0("PCA Projection – Hierarchical Clustering (K=", K, ")")) legend("topright", legend = levels(pca_df\)HC_Cluster), col = seq_along(levels(pca_df$HC_Cluster)), pch = 16, bty = “n”)
plot(pca_df\(PC1, pca_df\)PC2, col = pca_df\(KM_Cluster, pch = 16, xlab = "PC1", ylab = "PC2", main = paste0("PCA Projection – K-means Clustering (K=", K, ")")) legend("topright", legend = levels(pca_df\)KM_Cluster), col = seq_along(levels(pca_df$KM_Cluster)), pch = 16, bty = “n”)