Φόρτωση & Αρχική Διερεύνηση Datase

heart <- read.csv(“framingham.csv”)

str(heart) summary(heart) colSums(is.na(heart))

Προετοιμασία Δεδομένων

num_cols <- names(heart)[sapply(heart, is.numeric)] heart_num <- heart[, num_cols, drop = FALSE]

Αφαίρεση labels / outcomes

if (“TenYearCHD” %in% names(heart_num)) heart_num$TenYearCHD <- NULL

Απλή διαχείριση ελλείψεων & τυποποίηση

heart_num <- na.omit(heart_num)

dim(heart_num) summary(heart_num)

X <- scale(heart_num)

Υπολογισμός αποστάσεων & linkage

d <- dist(X, method = “euclidean”) hc <- hclust(d, method = “ward.D2”)

Δενδροδιαγραμμα

plot(hc, labels = FALSE, hang = -1, main = “Hierarchical Clustering Dendrogram (Ward.D2)”)

Επιλογή αριθμού συστάδων

K <- 3 cl_hc <- cutree(hc, k = K)

table(cl_hc)

plot(hc, labels = FALSE, hang = -1, main = paste0(“Dendrogram with Cut (K =”, K, “)”)) rect.hclust(hc, k = K, border = 2:6)

Προφιλ Συσταδων

heart_profile <- heart_num heart_profile$cluster <- factor(cl_hc)

aggregate(. ~ cluster, data = heart_profile, FUN = mean)

Συσταδοποιηση K-means

wss <- sapply(1:10, function(k) { kmeans(X, centers = k, nstart = 20)$tot.withinss })

plot(1:10, wss, type = “b”, xlab = “Number of clusters (k)”, ylab = “Total Within-Cluster Sum of Squares”, main = “Elbow Method for K-means”)

Εφαρμογη K-means

set.seed(937) km <- kmeans(X, centers = K, nstart = 50)

table(km$cluster)

Προφιλ Συσταδων K-means

heart_profile_km <- heart_num heart_profile_km\(cluster <- factor(km\)cluster)

aggregate(. ~ cluster, data = heart_profile_km, FUN = mean)

Οπτικοποίηση Συστάδων με PCA

pca <- prcomp(X)

pca_df <- data.frame( PC1 = pca\(x[,1], PC2 = pca\)x[,2], HC_Cluster = factor(cl_hc), KM_Cluster = factor(km$cluster) )

Hierarchical Clustering

plot(pca_df\(PC1, pca_df\)PC2, col = pca_df\(HC_Cluster, pch = 16, xlab = "PC1", ylab = "PC2", main = paste0("PCA Projection – Hierarchical Clustering (K=", K, ")")) legend("topright", legend = levels(pca_df\)HC_Cluster), col = seq_along(levels(pca_df$HC_Cluster)), pch = 16, bty = “n”)

K-means Clusters

plot(pca_df\(PC1, pca_df\)PC2, col = pca_df\(KM_Cluster, pch = 16, xlab = "PC1", ylab = "PC2", main = paste0("PCA Projection – K-means Clustering (K=", K, ")")) legend("topright", legend = levels(pca_df\)KM_Cluster), col = seq_along(levels(pca_df$KM_Cluster)), pch = 16, bty = “n”)