#
#K-Medoids Clustering in R
#PREPARED BY MOSTADAWI KADER ABDULLAH
homedir <- "C:/Users/Mostafa/Desktop/Thesis"
setwd(homedir)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.1.3
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
#load data
df <- USArrests
#remove rows with missing values
df <- na.omit(df)
#scale each variable to have a mean of 0 and sd of 1
df <- scale(df)
#plot clusters vs within sum of squares
fviz_nbclust(df, pam, method = "wss")

#calculate gap statistic based on number of clusters
gap_stat <- clusGap(df,
FUN = pam,
K.max = 10, #max clusters to consider
B = 50) #total bootstrapped iterations
#plot number of clusters vs. gap statistic
fviz_gap_stat(gap_stat)

#make this example reproducible
set.seed(1)
#perform k-medoids clustering with k = 4 clusters
kmed <- pam(df, k = 4)
#view results
kmed
## Medoids:
## ID Murder Assault UrbanPop Rape
## Alabama 1 1.2425641 0.7828393 -0.5209066 -0.003416473
## Michigan 22 0.9900104 1.0108275 0.5844655 1.480613993
## Oklahoma 36 -0.2727580 -0.2371077 0.1699510 -0.131534211
## New Hampshire 29 -1.3059321 -1.3650491 -0.6590781 -1.252564419
## Clustering vector:
## Alabama Alaska Arizona Arkansas California
## 1 2 2 1 2
## Colorado Connecticut Delaware Florida Georgia
## 2 3 3 2 1
## Hawaii Idaho Illinois Indiana Iowa
## 3 4 2 3 4
## Kansas Kentucky Louisiana Maine Maryland
## 3 3 1 4 2
## Massachusetts Michigan Minnesota Mississippi Missouri
## 3 2 4 1 3
## Montana Nebraska Nevada New Hampshire New Jersey
## 3 3 2 4 3
## New Mexico New York North Carolina North Dakota Ohio
## 2 2 1 4 3
## Oklahoma Oregon Pennsylvania Rhode Island South Carolina
## 3 3 3 3 1
## South Dakota Tennessee Texas Utah Vermont
## 4 1 2 3 4
## Virginia Washington West Virginia Wisconsin Wyoming
## 3 3 4 4 3
## Objective function:
## build swap
## 1.035116 1.027102
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
#plot results of final k-medoids model
fviz_cluster(kmed, data = df)

#add cluster assignment to original data
final_data <- cbind(USArrests, cluster = kmed$cluster)
#view final data
head(final_data)
## Murder Assault UrbanPop Rape cluster
## Alabama 13.2 236 58 21.2 1
## Alaska 10.0 263 48 44.5 2
## Arizona 8.1 294 80 31.0 2
## Arkansas 8.8 190 50 19.5 1
## California 9.0 276 91 40.6 2
## Colorado 7.9 204 78 38.7 2