d <- read.csv("https://stats.dip.jp/01_ds/data/Mall_Customers.csv")

colnames(d) <- c("id", "gender", "age", "income", "score")
library(DT)
datatable(d, options = list(pageLength = 5))
NGROUPS <- 2
COL <- rainbow(NGROUPS)

matplot(x = d$income, y = d$score, pch = 16, type = "p", col = COL[1], 
        main = "Income & Score", xlab = "Income", ylab = "Score")
grid()

group <- ifelse(d$gender == "Male", 1, 2)

pairs(d[, c("age", "income", "score")], pch = 15 + as.numeric(as.factor(d$gender)),  col = COL[group],
      lower.panel = NULL, oma = c(3, 3, 5, 3))

par(xpd = T)
legend("bottomleft", col = COL[1:3], pch = 16:18, legend = c('Male', 'Female'))

library(cluster)
library(factoextra)
##  要求されたパッケージ ggplot2 をロード中です
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

d_numbers <- d[, c("age","income","score")]
hc.a <- agnes(d_numbers)
fviz_dend(as.hclust(hc.a), k = 3, horiz = T, rect = T, rect_fill = T,
 color_labels_by_k = F, rect_border = 'jco', k_colors = 'jco', cex = 0.4)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
##   Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

gr <- cutree(hc.a, k = 3)
head(gr)
## [1] 1 1 1 1 1 1
hc.d <- diana(d_numbers)
fviz_dend(as.hclust(hc.d), k = 3, horiz = T, rect = T, rect_fill = T,
 color_labels_by_k = F, rect_border = 'jco', k_colors = 'jco', cex = 0.4)

library(plotly)
## 
##  次のパッケージを付け加えます: 'plotly'
##  以下のオブジェクトは 'package:ggplot2' からマスクされています:
## 
##     last_plot
##  以下のオブジェクトは 'package:stats' からマスクされています:
## 
##     filter
##  以下のオブジェクトは 'package:graphics' からマスクされています:
## 
##     layout
library(ggplot2)
library(cluster)
library(ggdendro)

hc.a |> as.dendrogram() |> ggdendrogram(rotate = T) |> ggplotly()
hc.d |> as.dendrogram() |> ggdendrogram(rotate = T) |> ggplotly()
METHOD <- c('single', 'complete', 'average', 'weighted', 'ward', 'gaverage', 'flexible')

ac <- rep(NA, 7)

for (i in 1:6) ac[i] <- agnes(d_numbers, method = METHOD[i])$ac
ac[7] <- agnes(d_numbers, method = METHOD[7], par.method = 0.5)$ac

names(ac) <- METHOD

barplot(ac, ylim = c(0.8, 1.0), xpd = F, col = "red")
abline(h = seq(0.8, 1.0, 0.05), lty = 3)  

fviz_nbclust(d_numbers, FUNcluster = hcut, method = 'wss')

fviz_nbclust(d_numbers, FUNcluster = hcut, method = 'gap_stat')

fviz_nbclust(d_numbers, FUNcluster = hcut, method = 'silhouette')