d <- read.csv('https://stats.dip.jp/01_ds/data/Mall_Customers.csv')
colnames(d) <- c('id', 'gender', 'age', 'income', 'score')
library(DT)
datatable(d, options = list(pageLength = 5))
NGROUPS <- 2
COL <- rainbow(NGROUPS)
matplot(x = d$income, y = d$score, pch = 16, type = 'p', col = COL[1])
grid()

NGROUPS <- 5
COL <- rainbow(NGROUPS)
km <- kmeans(d[, c('income', 'score')], centers = NGROUPS)
c <- vector('list', NGROUPS)
matplot(x = d$income, y = d$score, type = 'n', pch = 1)
grid()
for (i in 1:NGROUPS)
{
c[[i]] <- d[km$cluster == i, ]
matpoints(x = c[[i]]$income,
y = c[[i]]$score,
pch = 16,
col = COL[i])
}
legend('topright', pch = 16, bg = 'white', col = COL[1:NGROUPS], legend = paste0('Group', 1:NGROUPS))
library(factoextra)

fviz_cluster(km, data = d[, c('income', 'score')])

library(cluster)
pm <- pam(d[, -5], k = NGROUPS)
plot(pm)


cl <- clara(d[, c('income', 'score')], k = NGROUPS, pamLike = T, samples = 1)
plot(cl)


cl2 <- clara(d[, c('income', 'score')], k = NGROUPS, pamLike = F, samples = 50)
plot(cl2)


d$gender <- ifelse(d$gender == "Male", 1, 0)
data_for_clustering <- d[, c('age', 'income', 'score')]
set.seed(123)
NGROUPS <- 3
km <- kmeans(data_for_clustering, centers = NGROUPS, nstart = 25)
d$cluster <- as.factor(km$cluster)
fviz_cluster(km, data = data_for_clustering,
geom = "point",
ellipse.type = "convex",
main = "K-means Clustering of Mall Customers",
xlab = "Income",
ylab = "Score",
palette = rainbow(NGROUPS))
