d <- read.csv('https://stats.dip.jp/01_ds/data/Mall_Customers.csv')
colnames(d) <- c('id', 'gender', 'age', 'income', 'score')

library(DT)
datatable(d, options = list(pageLength = 5))
NGROUPS <- 2
# カラーパレット
COL <- rainbow(NGROUPS)

matplot(x = d$income, y = d$score, pch = 16, type = 'p', col = COL[1])
grid()

km <- kmeans(d[, c('age', 'income', 'score')], centers = NGROUPS, nstart = 25)

c <- vector('list', NGROUPS)
name.group <- rep(NA, NGROUPS)

matplot(x = d$income, y = d$score, type = 'n', xlab = 'Income', ylab = 'Score', main = 'Mall Customers Clustering')
grid()

for (i in 1:NGROUPS) {
  c[[i]] <- d[km$cluster == i, ]
  
  matpoints(x = c[[i]]$income,
            y = c[[i]]$score,
            pch = 16,
            col = COL[i])
  
  text(x = c[[i]]$income,
       y = c[[i]]$score + 1,
       labels = c[[i]]$id,
       col = gray(0.5), cex = 0.7)
  
  name.group[i] <- names(which.max(table(c[[i]]$gender)))
}

legend('topright', pch = 16, col = COL[1:NGROUPS],
       title = 'Cluster', legend = paste("Cluster", 1:NGROUPS))

library(factoextra)
## 要求されたパッケージ ggplot2 をロード中です
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(km, data = d[, c('income', 'score')])

library(cluster)
pm <- pam(d[, -5], k = NGROUPS)
plot(pm)

cl <- clara(d[, c('income', 'score')], k = NGROUPS, pamLike = T, samples = 1)
plot(cl)

cl2 <- clara(d[, c('income', 'score')], k = NGROUPS, pamLike = F, samples = 50)
plot(cl2)

d$gender <- ifelse(d$gender == "Male", 1, 0)

data_for_clustering <- d[, c('age', 'income', 'score')]

set.seed(123)  
NGROUPS <- 3  
km <- kmeans(data_for_clustering, centers = NGROUPS, nstart = 25)

d$cluster <- as.factor(km$cluster)

fviz_cluster(km, data = data_for_clustering,
             geom = "point", 
             ellipse.type = "convex", 
             main = "K-means Clustering of Mall Customers",
             xlab = "Income",
             ylab = "Score",
             palette = rainbow(NGROUPS))