PAC2: Dendrogram *****
# Load libraries
library(knitr)
library(ggplot2)
library(ggdendro)
library(dendextend)
library(readr)
# Load the dataset
hdi_data <- read_csv("human-development-index.csv")
# Filter data for the year 2022
hdi_2022 <- subset(hdi_data, Year == 2022)
# Standardize HDI values for clustering
hdi_scaled <- scale(hdi_2022$`Human Development Index`)
# Perform hierarchical clustering
d <- dist(hdi_scaled) # compute distance matrix
hc <- hclust(d, method = "ward.D2") # hierarchical clustering with Ward's method
# Convert the hclust object to a dendrogram
dend <- as.dendrogram(hc)
# Use dendextend to customize the dendrogram
dend <- dend %>%
set("labels", hdi_2022$Entity) %>%
set("labels_cex", 0.7) %>%
set("branches_k_color", k = 5) %>%
set("branches_lwd", 0.6) %>%
set("labels_colors", value = "blue")
# Plot the dendrogram
plot(dend, main = "Dendrogram of Countries Based on HDI (2022)", horiz = TRUE)
# Convert hclust object to dendrogram data for ggplot
dend_data <- as.dendrogram(hc)
dend_data <- ggdendro::dendro_data(dend_data, type = "rectangle")
# Plot using ggplot2
ggplot() +
geom_segment(data = dend_data$segments, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_text(data = dend_data$labels, aes(x = x, y = y, label = label), hjust = 1, size = 3) +
coord_flip() +
labs(title = "Dendrogram of Countries Based on HDI (2022)", x = "Distance", y = "") +
theme_minimal()