# Load necessary libraries
library(readr)
library(ggplot2)
library(cluster)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dendextend)
##
## ---------------------
## Welcome to dendextend version 1.19.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
# Read the dataset
HeartFailure_excerpt <- read.csv("C:/Users/15123/Downloads/HeartFailure_excerpt.csv", header=FALSE)
# Rename columns
feature_name <- c("Age", "CPK_enzyme", "Ejection_fraction", "Platelets", "Serum_creatinine", "Serum_sodium", "Time", "DEATH_EVENT")
colnames(HeartFailure_excerpt) <- feature_name
# Remove labels (DEATH_EVENT) for clustering
heart_data <- HeartFailure_excerpt[, -8]
# Scaling the dataset
scaled_data <- scale(heart_data)
# Compute distance matrix
dist_mat <- dist(scaled_data, method = "euclidean")
# Hierarchical clustering with average linkage
hclust_avg <- hclust(dist_mat, method = 'average')
# Color dendrogram branches
avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 3)
plot(avg_col_dend)
# K-means Clustering
set.seed(1234)
kmeans_results <- kmeans(scaled_data, centers = 3, iter.max = 200)
# Plot K-means Clusters
ggplot(data = scaled_data, aes(x = scaled_data[,1], y = scaled_data[,2], color = factor(kmeans_results$cluster))) +
geom_point() +
labs(title = "K-means Clustering")
# Compute silhouette scores for K-means
s <- silhouette(kmeans_results$cluster, dist(scaled_data))
plot(s)
# Plot hierarchical clustering with rectangular borders
plot(hclust_avg)
rect.hclust(hclust_avg, k = 3, border = 2:6)
abline(h = 3, col = "red")
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.