Clustering Analysis: K-Means dan Hierarchical Clustering

Author

Windi Pangesti

Published

September 21, 2025

Persiapan dan Import data

library(tidyverse)
library(factoextra)
library(DataExplorer)
library(easystats)
library(umap)
library(ggpubr)
library(readxl)
library(purrr)
library(ggplot2)
library(tidyr)
library(corrplot)
library(gridExtra)
library(dplyr)
library(forcats)

# Data gregat
df <- read_excel("Data_clean.xlsx")
glimpse(df)

Rows: 34
Columns: 5
$ Provinsi              <chr> "ACEH", "BALI", "BANTEN", "BENGKULU", "DI YOGYAK…
$ `Mutu Lulusan`        <dbl> 85.04377, 93.39481, 91.29524, 93.38393, 91.76484…
$ `Proses Pembelajaran` <dbl> 84.45532, 91.85758, 90.38333, 88.51042, 91.96667…
$ `Mutu Guru`           <dbl> 82.08983, 91.89646, 88.26620, 87.03125, 87.13248…
$ `Manajemen S/M`       <dbl> 85.40780, 95.51010, 92.14583, 91.19097, 93.72222…

# Data asli
df_asli <- read_excel("DATA TUGAS-1_Final.xlsx", sheet="SMA")
glimpse(df_asli)

Rows: 1,144
Columns: 11
$ No                   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15…
$ TAHUN                <dbl> 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2020, 2…
$ NPSN                 <dbl> 20501870, 10303885, 69974471, 50105492, 10600905,…
$ `Status Sekolah`     <chr> "Swasta", "Swasta", "Swasta", "Negeri", "Negeri",…
$ Provinsi             <chr> "JAWA TIMUR", "SUMATERA BARAT", "JAWA BARAT", "BA…
$ `Kab/Kota`           <chr> "KABUPATEN SIDOARJO", "KOTA PAYAKUMBUH", "KABUPAT…
$ Mutu_Lulusan         <dbl> 88.57143, 82.85714, 74.28571, 94.28571, 97.14286,…
$ Proses_Pembelajaran  <dbl> 90.00000, 86.66667, 83.33333, 96.66667, 96.66667,…
$ Mutu_Guru            <dbl> 88.88889, 83.33333, 77.77778, 100.00000, 94.44444…
$ `Manajemen_S/M`      <dbl> 94.44444, 88.88889, 88.88889, 100.00000, 100.0000…
$ Peringkat_Akreditasi <chr> "A", "B", "B", "A", "A", "B", "C", "A", "B", "A",…

Eksplorasi Data

Korelasi data agregat

# Hitung korelasi numeric saja
cor_mat <- cor(df %>% select(where(is.numeric)), method = "pearson")

# Plot korelasi sebagai heatmap
corrplot(cor_mat, method = "color", 
         type = "upper",       # tampilkan segitiga atas
         addCoef.col = "black", # tulis nilai korelasi
         tl.col = "black", tl.srt = 45) # label miring

Histogram data agregat

plot_intro(data = df,ggtheme = theme_minimal())

plot_histogram(data = df,
               ncol = 2,nrow = 2,
               geom_histogram_args = list(fill="steelblue",col="black"),
               ggtheme = theme_minimal())

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Boxplot data agregat

# Ubah data ke format long
df_long <- df %>%
  select(-Provinsi) %>%  # jika kolom Provinsi hanya label
  pivot_longer(cols = everything(),
               names_to = "Variable",
               values_to = "Value")

# Boxplot eksplorasi tiap variabel
ggplot(df_long, aes(x = Variable, y = Value)) +
  geom_boxplot(fill = "steelblue", color = "black", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribusi Variabel (Boxplot Eksplorasi)", 
       x = "Variabel", 
       y = "Nilai") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Boxplot data asli

# Ubah ke long format
df_box <- df_asli %>%
  select(Provinsi, Mutu_Lulusan, Mutu_Guru, Proses_Pembelajaran, `Manajemen_S/M`) %>%
  pivot_longer(cols = -Provinsi,
               names_to = "Variable",
               values_to = "Value")

# Hitung rata-rata per provinsi per variabel
prov_means <- df_box %>%
  group_by(Variable, Provinsi) %>%
  summarise(mean_value = mean(Value, na.rm = TRUE), .groups = "drop")

# Gabungkan mean untuk membuat faktor Provinsi terurut per variabel
df_box <- df_box %>%
  left_join(prov_means, by = c("Variable", "Provinsi")) %>%
  group_by(Variable) %>%
  mutate(Provinsi = fct_reorder(Provinsi, mean_value, .desc = TRUE)) %>%
  ungroup()

# Plot
ggplot(df_box, aes(x = Provinsi, y = Value, fill = Provinsi)) +
  geom_boxplot(alpha = 0.7) +
  facet_wrap(~ Variable, scales = "free_y") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 6),
        legend.position = "none") +
  labs(title = "Distribusi Variabel per Provinsi",
       x = "Provinsi",
       y = "Rataan Skor")

Reduksi Dimensi untuk eksplorasi

Standarisasi

df_std <- df %>%
  select(-Provinsi) %>%
  standardize()
rownames(df_std) <- df$Provinsi   # simpan nama provinsi

Warning: Setting row names on a tibble is deprecated.

PCA

pca0 <- prcomp(df_std)
pca0$rotation %>% 
  as.data.frame()

                          PC1        PC2        PC3        PC4
Mutu Lulusan        0.5016089 -0.1508693 -0.7655252  0.3736284
Proses Pembelajaran 0.5033937  0.1186185 -0.1079213 -0.8490450
Mutu Guru           0.4974482  0.7098739  0.3562985  0.3488205
Manajemen S/M       0.4975224 -0.6776778  0.5247624  0.1335990

# Visualisasi
fviz_pca_ind(pca0,
             geom.ind = c("point","text"),
             repel = TRUE,
             labelsize = 3)

UMAP

umap0 <- umap(df_std)
data_umap <- data.frame(x = umap0$layout[,1],
                        y = umap0$layout[,2],
                        Provinsi = rownames(df_std))

ggscatter(data_umap, x = "x", y = "y",
          label = data_umap$Provinsi,
          repel = TRUE,
          font.label = c(8, "plain", "grey50"))

Menentukan Jumlah Cluster Optimal

K-Means: Elbow & Silhouette

set.seed(123)
fviz_nbclust(df_std, kmeans, method = "wss", k.max = 25) # Elbow

set.seed(123)
fviz_nbclust(df_std, kmeans, method = "silhouette", k.max = 25) # Silhouette

Hierarchical Clustering: Dendogram dan Silhouette

metode_agg <- c("single","complete","average","ward.D","ward.D2","median","centroid","mcquitty")

# Dendrogram berbagai metode linkage
walk(metode_agg, ~ {
  hc_res0 <- hcut(x = df_std, hc_method = .x, hc_func = "hclust")
  print(fviz_dend(hc_res0,
                  type = "rectangle",
                  k_colors ="black",
                  main = str_c("Dendrogram of HC with ", .x, " linkage"),
                  cex = 0.5))
})

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
of ggplot2 3.3.4.
ℹ The deprecated feature was likely used in the factoextra package.
  Please report the issue at <https://github.com/kassambara/factoextra/issues>.

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

# Silhouette untuk jumlah cluster optimal
walk(metode_agg, ~ {
  print(
    fviz_nbclust(df_std, FUNcluster = hcut, 
                 method = "silhouette", 
                 hc_method = .x) +
      ggtitle(paste("Silhouette Optimal Cluster -", .x, "Linkage"))
  )
})

# Menyimpan k optimal
library(factoextra)
library(tidyverse)
library(purrr)

get_kopt_from_fviz <- function(X, linkage) {
  p <- fviz_nbclust(X, FUNcluster = hcut, method = "silhouette", hc_method = linkage)
  dd <- layer_data(p, 1) %>% distinct(x, y)   # x = k, y = silhouette
  k_opt  <- dd$x[which.max(dd$y)]
  silmax <- max(dd$y, na.rm = TRUE)
  tibble(linkage = linkage, k_opt = k_opt, sil_max = silmax)
}

kopt_tbl_fviz <- map_dfr(metode_agg, ~ get_kopt_from_fviz(df_std, .x)) %>%
  arrange(desc(sil_max))

kopt_tbl_fviz

# A tibble: 8 × 3
  linkage  k_opt      sil_max
  <chr>    <mppd_dsc>   <dbl>
1 average  2            0.565
2 ward.D   2            0.565
3 ward.D2  2            0.565
4 centroid 2            0.565
5 mcquitty 2            0.565
6 single   2            0.541
7 complete 2            0.455
8 median   4            0.277

#list hasil HC untuk semua metode linkage
hc_all <- pmap(
  list(kopt_tbl_fviz$linkage, kopt_tbl_fviz$k_opt),
  function(meth, k) {
    hc <- hclust(dist(df_std), method = meth)
    cl <- cutree(hc, k = k)
    list(linkage = meth, k_opt = k, hc = hc, clusters = cl)
  }
)
names(hc_all) <- kopt_tbl_fviz$linkage

Clustering

Hierarchical Clustering

# daftar metode linkage
metode_agg <- c("single","complete","average","ward.D","ward.D2",
                "median","centroid","mcquitty")

# loop untuk setiap metode
for (meth in metode_agg) {
  
  # buat hclust
  hc_res <- hclust(dist(df_std), method = meth)
  
  # ambil k optimum untuk metode ini
  k_opt <- kopt_tbl_fviz$k_opt[kopt_tbl_fviz$linkage == meth]
  
  # potong cluster sesuai k_opt
  clusters_hc <- cutree(hc_res, k = k_opt)
  
  # tampilkan dendrogram
  p <- fviz_dend(hc_res, k = k_opt, rect = TRUE,
                 show_labels = TRUE,
                 cex = 0.5) +
       ggtitle(paste0("HC - ", meth, " (k = ", k_opt, ")"))
  
  print(p)   # tampilkan satu per satu
}

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

Warning in data.frame(xmin = unlist(xleft), ymin = unlist(ybottom), xmax =
unlist(xright), : row names were found from a short variable and have been
discarded

Warning in get_col(col, k): Length of color vector was shorter than the number
of clusters - color vector was recycled

K-Means dengan jumlah cluster optimal

k_opt <- 2 # bisa ganti sesuai hasil elbow/silhouette
set.seed(123)
kmean_res <- kmeans(df_std, centers = k_opt, iter.max = 100)
kmean_res$centers

  Mutu Lulusan Proses Pembelajaran  Mutu Guru Manajemen S/M
1   -1.1927301          -1.2067627 -1.2214300    -1.2190815
2    0.4969709           0.5028178  0.5089292     0.5079506

Visualisasi Hasil klaster

# Plot K-Means
plot_kmeans <- fviz_cluster(list(data = df_std,
                                 cluster = kmean_res$cluster),
                            geom.ind = c("point","text"),
                            repel = TRUE,
                            show.clust.cent = TRUE,
                            ellipse.alpha = 0.1,
                            labelsize = 6) +
  theme_minimal() +
  ggtitle("K-Means Clustering")
print(plot_kmeans)

# cluster hc dgn k_opt
metode_agg <- kopt_tbl_fviz$linkage

walk(metode_agg, ~{
  meth  <- .x
  k_opt <- kopt_tbl_fviz$k_opt[kopt_tbl_fviz$linkage == meth]

  hc_res      <- hclust(dist(df_std), method = meth)
  clusters_hc <- cutree(hc_res, k = k_opt)

  plot_hc <- fviz_cluster(list(data = df_std,
                               cluster = clusters_hc),
                          geom.ind = c("point","text"),
                          repel = TRUE,
                          show.clust.cent = TRUE,
                          ellipse.alpha = 0.1,
                          labelsize = 6) +
  theme_minimal() +
  ggtitle(paste0("Hierarchical Clustering – ", meth, " (k = ", k_opt, ")"))

  
   print(plot_hc)
})

Simpan Hasil Klaster ke Data Asli

df$cluster_kmeans <- kmean_res$cluster

# Tambahkan cluster HC
metode_agg <- kopt_tbl_fviz$linkage

for (meth in metode_agg) {
  k_opt <- kopt_tbl_fviz$k_opt[kopt_tbl_fviz$linkage == meth]  # jumlah cluster optimal
  hc    <- hclust(dist(df_std), method = meth)                 # dendrogram HC
  df[[paste0("cluster_hc_", meth)]] <- cutree(hc, k = k_opt)   # tempel ke df
}

# Tampilkan ringkasan ukuran cluster
cat("\n== Ukuran cluster K-Means ==\n")


== Ukuran cluster K-Means ==

print(table(df$cluster_kmeans))


 1  2 
10 24

walk(metode_agg, ~{
  nm <- paste0("cluster_hc_", .x)
  cat("\n== Ukuran cluster HC -", .x, "==\n")
  print(table(df[[nm]]))
})


== Ukuran cluster HC - average ==

 1  2 
 7 27 

== Ukuran cluster HC - ward.D ==

 1  2 
 7 27 

== Ukuran cluster HC - ward.D2 ==

 1  2 
 7 27 

== Ukuran cluster HC - centroid ==

 1  2 
 7 27 

== Ukuran cluster HC - mcquitty ==

 1  2 
 7 27 

== Ukuran cluster HC - single ==

 1  2 
31  3 

== Ukuran cluster HC - complete ==

 1  2 
14 20 

== Ukuran cluster HC - median ==

 1  2  3  4 
 7  1 23  3

# Crosstab perbandingan KMeans vs tiap HC
walk(metode_agg, ~{
  nm <- paste0("cluster_hc_", .x)
  cat("\n== Crosstab KMeans vs HC -", .x, "==\n")
  print(table(KMeans = df$cluster_kmeans, HC = df[[nm]]))
})


== Crosstab KMeans vs HC - average ==
      HC
KMeans  1  2
     1  7  3
     2  0 24

== Crosstab KMeans vs HC - ward.D ==
      HC
KMeans  1  2
     1  7  3
     2  0 24

== Crosstab KMeans vs HC - ward.D2 ==
      HC
KMeans  1  2
     1  7  3
     2  0 24

== Crosstab KMeans vs HC - centroid ==
      HC
KMeans  1  2
     1  7  3
     2  0 24

== Crosstab KMeans vs HC - mcquitty ==
      HC
KMeans  1  2
     1  7  3
     2  0 24

== Crosstab KMeans vs HC - single ==
      HC
KMeans  1  2
     1  7  3
     2 24  0

== Crosstab KMeans vs HC - complete ==
      HC
KMeans  1  2
     1 10  0
     2  4 20

== Crosstab KMeans vs HC - median ==
      HC
KMeans  1  2  3  4
     1  7  0  0  3
     2  0  1 23  0

# Tampilkan sebagian hasil df dengan kolom cluster
head(df)

# A tibble: 6 × 14
  Provinsi      `Mutu Lulusan` `Proses Pembelajaran` `Mutu Guru` `Manajemen S/M`
  <chr>                  <dbl>                 <dbl>       <dbl>           <dbl>
1 ACEH                    85.0                  84.5        82.1            85.4
2 BALI                    93.4                  91.9        91.9            95.5
3 BANTEN                  91.3                  90.4        88.3            92.1
4 BENGKULU                93.4                  88.5        87.0            91.2
5 DI YOGYAKARTA           91.8                  92.0        87.1            93.7
6 DKI JAKARTA             94.5                  91.1        88.8            92.7
# ℹ 9 more variables: cluster_kmeans <int>, cluster_hc_average <int>,
#   cluster_hc_ward.D <int>, cluster_hc_ward.D2 <int>,
#   cluster_hc_centroid <int>, cluster_hc_mcquitty <int>,
#   cluster_hc_single <int>, cluster_hc_complete <int>, cluster_hc_median <int>

Interpretasi

# --- K-Means ---
df %>%
  group_by(cluster_kmeans) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 2 × 13
  cluster_kmeans `mean_Mutu Lulusan` `mean_Proses Pembelajaran` `mean_Mutu Guru`
           <int>               <dbl>                      <dbl>            <dbl>
1              1                85.1                       83.9             81.0
2              2                90.5                       89.0             87.3
# ℹ 9 more variables: `mean_Manajemen S/M` <dbl>,
#   mean_cluster_hc_average <dbl>, mean_cluster_hc_ward.D <dbl>,
#   mean_cluster_hc_ward.D2 <dbl>, mean_cluster_hc_centroid <dbl>,
#   mean_cluster_hc_mcquitty <dbl>, mean_cluster_hc_single <dbl>,
#   mean_cluster_hc_complete <dbl>, mean_cluster_hc_median <dbl>

# --- HC: Single linkage ---
df %>%
  group_by(cluster_hc_single) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 2 × 13
  cluster_hc_single `mean_Mutu Lulusan` mean_Proses Pembelaja…¹ `mean_Mutu Guru`
              <int>               <dbl>                   <dbl>            <dbl>
1                 1                89.6                    88.1             86.2
2                 2                82.5                    81.3             77.5
# ℹ abbreviated name: ¹`mean_Proses Pembelajaran`
# ℹ 9 more variables: `mean_Manajemen S/M` <dbl>, mean_cluster_kmeans <dbl>,
#   mean_cluster_hc_average <dbl>, mean_cluster_hc_ward.D <dbl>,
#   mean_cluster_hc_ward.D2 <dbl>, mean_cluster_hc_centroid <dbl>,
#   mean_cluster_hc_mcquitty <dbl>, mean_cluster_hc_complete <dbl>,
#   mean_cluster_hc_median <dbl>

# --- HC: Complete linkage ---
df %>%
  group_by(cluster_hc_complete) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 2 × 13
  cluster_hc_complete `mean_Mutu Lulusan` `mean_Proses Pembelajaran`
                <int>               <dbl>                      <dbl>
1                   1                85.9                       84.9
2                   2                91.1                       89.3
# ℹ 10 more variables: `mean_Mutu Guru` <dbl>, `mean_Manajemen S/M` <dbl>,
#   mean_cluster_kmeans <dbl>, mean_cluster_hc_average <dbl>,
#   mean_cluster_hc_ward.D <dbl>, mean_cluster_hc_ward.D2 <dbl>,
#   mean_cluster_hc_centroid <dbl>, mean_cluster_hc_mcquitty <dbl>,
#   mean_cluster_hc_single <dbl>, mean_cluster_hc_median <dbl>

# --- HC: Average linkage ---
df %>%
  group_by(cluster_hc_average) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 2 × 13
  cluster_hc_average `mean_Mutu Lulusan` mean_Proses Pembelaj…¹ `mean_Mutu Guru`
               <int>               <dbl>                  <dbl>            <dbl>
1                  1                84.4                   82.9             79.9
2                  2                90.1                   88.7             86.9
# ℹ abbreviated name: ¹`mean_Proses Pembelajaran`
# ℹ 9 more variables: `mean_Manajemen S/M` <dbl>, mean_cluster_kmeans <dbl>,
#   mean_cluster_hc_ward.D <dbl>, mean_cluster_hc_ward.D2 <dbl>,
#   mean_cluster_hc_centroid <dbl>, mean_cluster_hc_mcquitty <dbl>,
#   mean_cluster_hc_single <dbl>, mean_cluster_hc_complete <dbl>,
#   mean_cluster_hc_median <dbl>

# --- HC: Ward.D ---
df %>%
  group_by(cluster_hc_ward.D) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 2 × 13
  cluster_hc_ward.D `mean_Mutu Lulusan` mean_Proses Pembelaja…¹ `mean_Mutu Guru`
              <int>               <dbl>                   <dbl>            <dbl>
1                 1                84.4                    82.9             79.9
2                 2                90.1                    88.7             86.9
# ℹ abbreviated name: ¹`mean_Proses Pembelajaran`
# ℹ 9 more variables: `mean_Manajemen S/M` <dbl>, mean_cluster_kmeans <dbl>,
#   mean_cluster_hc_average <dbl>, mean_cluster_hc_ward.D2 <dbl>,
#   mean_cluster_hc_centroid <dbl>, mean_cluster_hc_mcquitty <dbl>,
#   mean_cluster_hc_single <dbl>, mean_cluster_hc_complete <dbl>,
#   mean_cluster_hc_median <dbl>

# --- HC: Ward.D2 ---
df %>%
  group_by(cluster_hc_ward.D2) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 2 × 13
  cluster_hc_ward.D2 `mean_Mutu Lulusan` mean_Proses Pembelaj…¹ `mean_Mutu Guru`
               <int>               <dbl>                  <dbl>            <dbl>
1                  1                84.4                   82.9             79.9
2                  2                90.1                   88.7             86.9
# ℹ abbreviated name: ¹`mean_Proses Pembelajaran`
# ℹ 9 more variables: `mean_Manajemen S/M` <dbl>, mean_cluster_kmeans <dbl>,
#   mean_cluster_hc_average <dbl>, mean_cluster_hc_ward.D <dbl>,
#   mean_cluster_hc_centroid <dbl>, mean_cluster_hc_mcquitty <dbl>,
#   mean_cluster_hc_single <dbl>, mean_cluster_hc_complete <dbl>,
#   mean_cluster_hc_median <dbl>

# --- HC: Median linkage ---
df %>%
  group_by(cluster_hc_median) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 4 × 13
  cluster_hc_median `mean_Mutu Lulusan` mean_Proses Pembelaja…¹ `mean_Mutu Guru`
              <int>               <dbl>                   <dbl>            <dbl>
1                 1                86.2                    85.0             82.6
2                 2                93.4                    91.9             91.9
3                 3                90.4                    88.8             87.1
4                 4                82.5                    81.3             77.5
# ℹ abbreviated name: ¹`mean_Proses Pembelajaran`
# ℹ 9 more variables: `mean_Manajemen S/M` <dbl>, mean_cluster_kmeans <dbl>,
#   mean_cluster_hc_average <dbl>, mean_cluster_hc_ward.D <dbl>,
#   mean_cluster_hc_ward.D2 <dbl>, mean_cluster_hc_centroid <dbl>,
#   mean_cluster_hc_mcquitty <dbl>, mean_cluster_hc_single <dbl>,
#   mean_cluster_hc_complete <dbl>

# --- HC: Centroid linkage ---
df %>%
  group_by(cluster_hc_centroid) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 2 × 13
  cluster_hc_centroid `mean_Mutu Lulusan` `mean_Proses Pembelajaran`
                <int>               <dbl>                      <dbl>
1                   1                84.4                       82.9
2                   2                90.1                       88.7
# ℹ 10 more variables: `mean_Mutu Guru` <dbl>, `mean_Manajemen S/M` <dbl>,
#   mean_cluster_kmeans <dbl>, mean_cluster_hc_average <dbl>,
#   mean_cluster_hc_ward.D <dbl>, mean_cluster_hc_ward.D2 <dbl>,
#   mean_cluster_hc_mcquitty <dbl>, mean_cluster_hc_single <dbl>,
#   mean_cluster_hc_complete <dbl>, mean_cluster_hc_median <dbl>

# --- HC: Mcquitty linkage ---
df %>%
  group_by(cluster_hc_mcquitty) %>%
  summarise(across(where(is.numeric), mean, .names = "mean_{.col}"))

# A tibble: 2 × 13
  cluster_hc_mcquitty `mean_Mutu Lulusan` `mean_Proses Pembelajaran`
                <int>               <dbl>                      <dbl>
1                   1                84.4                       82.9
2                   2                90.1                       88.7
# ℹ 10 more variables: `mean_Mutu Guru` <dbl>, `mean_Manajemen S/M` <dbl>,
#   mean_cluster_kmeans <dbl>, mean_cluster_hc_average <dbl>,
#   mean_cluster_hc_ward.D <dbl>, mean_cluster_hc_ward.D2 <dbl>,
#   mean_cluster_hc_centroid <dbl>, mean_cluster_hc_single <dbl>,
#   mean_cluster_hc_complete <dbl>, mean_cluster_hc_median <dbl>