n <- 50
ii <- sample(1:nrow(iris), n)
d <- iris[ii, ]
group <- d[, 5]
rownames(d) <- paste(d$Species, 1:nrow(d))
library(DT)
datatable(d, options = list(pageLength = 5))
# カラーパレット
COL <- c(rgb(255, 0, 0, 105, max = 255), # 赤
rgb( 0, 0, 255, 105, max = 255), # 青
rgb( 0, 155, 0, 105, max = 255), # 緑
rgb(100, 100, 100, 55, max = 255)) # 灰
pairs(d[, -5], pch = 15 + as.numeric(d$Species), col = COL[group],
lower.panel = NULL, oma = c(3, 3, 5, 3), main = 'Iris Data')
par(xpd = T)
legend('bottomleft', col = COL[1:3], pch = 16:18, legend = unique(d$Species))
library(cluster)
library(factoextra)
## 要求されたパッケージ ggplot2 をロード中です
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

# AGNES
hc.a <- agnes(d)
fviz_dend(as.hclust(hc.a), k = 3, horiz = T, rect = T, rect_fill = T,
color_labels_by_k = F, rect_border = 'jco', k_colors = 'jco', cex = 0.4)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

gr <- cutree(hc.a, k = 3) # クラスター数kのときのグループ番号
head(gr)
## [1] 1 2 1 1 1 2
# DIANA
hc.d <- diana(d)
fviz_dend(as.hclust(hc.d), k = 3, horiz = T, rect = T, rect_fill = T,
color_labels_by_k = F, rect_border = 'jco', k_colors = 'jco', cex = 0.4)

library(plotly)
##
## 次のパッケージを付け加えます: 'plotly'
## 以下のオブジェクトは 'package:ggplot2' からマスクされています:
##
## last_plot
## 以下のオブジェクトは 'package:stats' からマスクされています:
##
## filter
## 以下のオブジェクトは 'package:graphics' からマスクされています:
##
## layout
library(ggplot2)
library(cluster)
library(ggdendro)
hc.a |> as.dendrogram() |> ggdendrogram(rotate = T) |> ggplotly() # AGNES
hc.d |> as.dendrogram() |> ggdendrogram(rotate = T) |> ggplotly() # DIANA
METHOD <- c('single', 'complete', 'average',
'weighted', 'ward', 'gaverage', 'flexible')
ac <- rep(NA, 7)
for (i in 1:6) ac[i] <- agnes(d, method = METHOD[i])$ac
ac[7] <- agnes(d, method = METHOD[7], par.method = 0.5)$ac
names(ac) <- METHOD
barplot(ac, ylim = c(0.8, 1.0), xpd = F)
abline(h = seq(0, 1, 0.05), lty = 3)

fviz_nbclust(d[, -5], FUNcluster = hcut, method = 'wss')

fviz_nbclust(d[, -5], FUNcluster = hcut, method = 'gap_stat')

fviz_nbclust(d[, -5], FUNcluster = hcut, method = 'silhouette')

#kadai
d <- read.csv('https://stats.dip.jp/01_ds/data/Mall_Customers.csv')
colnames(d) <- c('id', 'gender', 'age', 'income', 'score')
datatable(d, options = list(pageLength = 5))
NGROUPS <- 2
COL <- rainbow(NGROUPS)
matplot(x = d$income, y = d$score, pch = 16, type = 'p', col = COL[1])
grid()

library(cluster)
d_clust <- d[, c('income', 'score')]
hc.a <- agnes(d_clust)
gr <- cutree(as.hclust(hc.a), k = 5)
COL <- c("red", "green", "blue", "yellow", "purple")
a <- vector("list", length(unique(gr)))
for (i in 1:length(unique(gr))) {
a[[i]] <- d[gr == i, ]
}
matplot(x = d$income, y = d$score, pch = 16, type = 'n', col = COL[1])
for (i in 1:length(unique(gr))) {
matpoints(x = a[[i]]$income, y = a[[i]]$score, pch = 16, type = 'p', col = COL[i])
}
grid()
