library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(cluster)
## Warning: package 'cluster' was built under R version 4.5.3
library(flexclust)
## Warning: package 'flexclust' was built under R version 4.5.3
library(dbscan)
## Warning: package 'dbscan' was built under R version 4.5.3
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(meanShiftR)
library(e1071)
## Warning: package 'e1071' was built under R version 4.5.3
##
## Attaching package: 'e1071'
## The following object is masked from 'package:flexclust':
##
## bclust
library(fpc)
## Warning: package 'fpc' was built under R version 4.5.3
##
## Attaching package: 'fpc'
## The following object is masked from 'package:dbscan':
##
## dbscan
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:e1071':
##
## element
Load & Preprocessing
df <- read.csv("covid_19_indonesia_time_series_all.csv")
df <- df %>% filter(Location.Level == "Province")
df_final <- df %>%
group_by(Location) %>%
summarise(
cases = mean(New.Cases, na.rm = TRUE),
deaths = mean(New.Deaths, na.rm = TRUE),
total_cases = mean(Total.Cases, na.rm = TRUE),
total_deaths = mean(Total.Deaths, na.rm = TRUE)
)
df_scaled <- scale(df_final[,-1])
Elbow Method
wss <- sapply(1:10, function(k){
kmeans(df_scaled, centers = k, nstart = 20)$tot.withinss
})
plot(1:10, wss, type = "b")

k <- 3
Clustering Models
# K-means
km <- kmeans(df_scaled, centers = k, nstart = 25)
# K-medians
kmed <- kcca(df_scaled, k = k, family = kccaFamily("kmedians"))
## Found more than one class "kcca" in cache; using the first, from namespace 'flexclust'
## Also defined by 'kernlab'
## Found more than one class "kcca" in cache; using the first, from namespace 'flexclust'
## Also defined by 'kernlab'
# DBSCAN
db <- dbscan::dbscan(df_scaled, eps = 0.5, minPts = 3)
# Mean Shift
ms <- meanShift(df_scaled)
# Fuzzy C-means
fcm <- cmeans(df_scaled, centers = k)
Silhouette Evaluation
dist_mat <- dist(df_scaled)
# K-means
sil_km <- mean(silhouette(km$cluster, dist_mat)[,3])
# K-medians
sil_kmed <- mean(silhouette(clusters(kmed), dist_mat)[,3])
# Fuzzy
sil_fcm <- mean(silhouette(fcm$cluster, dist_mat)[,3])
# DBSCAN (SAFE)
if(length(unique(db$cluster[db$cluster != 0])) > 1){
idx <- db$cluster != 0
sil_db <- mean(silhouette(db$cluster[idx], dist(df_scaled[idx, ]))[,3])
} else {
sil_db <- NA
}
# MeanShift
if(length(unique(ms$assignment)) > 1){
sil_ms <- mean(silhouette(ms$assignment, dist_mat)[,3])
} else {
sil_ms <- NA
}
# RESULT TABLE
hasil <- data.frame(
Method = c("K-means","K-medians","DBSCAN","MeanShift","Fuzzy"),
Silhouette = c(sil_km, sil_kmed, sil_db, sil_ms, sil_fcm)
)
hasil
## Method Silhouette
## 1 K-means 0.5939236
## 2 K-medians 0.5721922
## 3 DBSCAN NA
## 4 MeanShift 0.9031243
## 5 Fuzzy 0.5939236
Best Method
best <- hasil[which.max(hasil$Silhouette),]
best
## Method Silhouette
## 4 MeanShift 0.9031243
EDA
df_final$cluster <- km$cluster
aggregate(df_final[,-1], by = list(cluster = df_final$cluster), mean)
## cluster cases deaths total_cases total_deaths cluster
## 1 1 56.96633 1.521855 23955.67 687.7004 1
## 2 2 1034.10249 26.243828 405127.54 12255.6685 2
## 3 3 216.22500 4.599798 83862.97 2071.1536 3
Visualisasi PCA
pca <- prcomp(df_scaled)
plot(pca$x[,1:2],
col = df_final$cluster,
pch = 19,
main = "Cluster Visualization (PCA)")

Boxplot
boxplot(cases ~ cluster,
data = df_final,
main = "Cases per Cluster")
