df <- read.csv("Tenaga_Kesehatan.csv", stringsAsFactors = FALSE)
df_num <- df[, -1]
rownames(df_num) <- df$Provinsi

library(knitr)
kable(head(df_num, 10), caption = "Data Tenaga Kesehatan Menurut Provinsi", align = "l")
Data Tenaga Kesehatan Menurut Provinsi
Perawat Bidan Kefarmasian Tenaga.Kesehatan.Masyarakat Tenaga.Kesehatan.Lingkungan Tenaga.Gizi Tenaga.Medis Tenaga.Kesehatan.Psikologi.Klinis Tenaga.Keterapian.Fisik Tenaga.Keteknisan.Medis Tenaga.Teknik.Biomedika Tenaga.Kesehatan.Tradisional
Aceh 21 21 4 4 1 1 6 77 484 2 2 NA
Sumatera Utara 24 25 5 4 986 2 12 45 516 2 4 9
Sumatera Barat 12 9 3 2 641 1 5 31 380 2 2 2
Riau 13 10 4 2 487 770 6 60 466 1 2 2
Jambi 8 7 2 1 474 521 3 30 218 857 1 9
Sumatera Selatan 17 15 4 3 1 1 6 49 418 2 3 NA
Bengkulu 5 5 1 1 330 578 1 16 86 367 931 NA
Lampung 13 11 3 1 758 806 4 25 327 1 2 56
Kepulauan Bangka Belitung 4 2 1 479 156 247 1 20 114 402 585 NA
Kepulauan Riau 5 3 2 509 342 273 3 25 140 549 805 2
str(df_num)
## 'data.frame':    42 obs. of  12 variables:
##  $ Perawat                          : int  21 24 12 13 8 17 5 13 4 5 ...
##  $ Bidan                            : int  21 25 9 10 7 15 5 11 2 3 ...
##  $ Kefarmasian                      : int  4 5 3 4 2 4 1 3 1 2 ...
##  $ Tenaga.Kesehatan.Masyarakat      : int  4 4 2 2 1 3 1 1 479 509 ...
##  $ Tenaga.Kesehatan.Lingkungan      : int  1 986 641 487 474 1 330 758 156 342 ...
##  $ Tenaga.Gizi                      : int  1 2 1 770 521 1 578 806 247 273 ...
##  $ Tenaga.Medis                     : int  6 12 5 6 3 6 1 4 1 3 ...
##  $ Tenaga.Kesehatan.Psikologi.Klinis: int  77 45 31 60 30 49 16 25 20 25 ...
##  $ Tenaga.Keterapian.Fisik          : int  484 516 380 466 218 418 86 327 114 140 ...
##  $ Tenaga.Keteknisan.Medis          : int  2 2 2 1 857 2 367 1 402 549 ...
##  $ Tenaga.Teknik.Biomedika          : int  2 4 2 2 1 3 931 2 585 805 ...
##  $ Tenaga.Kesehatan.Tradisional     : int  NA 9 2 2 9 NA NA 56 NA 2 ...
summary(df_num)
##     Perawat           Bidan         Kefarmasian    Tenaga.Kesehatan.Masyarakat
##  Min.   :  1.00   Min.   :  1.00   Min.   :  1.0   Min.   :  1.0              
##  1st Qu.:  4.00   1st Qu.:  3.50   1st Qu.:  3.0   1st Qu.:  1.0              
##  Median : 10.00   Median :  7.00   Median :  5.0   Median :  4.0              
##  Mean   : 29.82   Mean   : 58.21   Mean   :178.9   Mean   :157.7              
##  3rd Qu.: 16.50   3rd Qu.: 13.50   3rd Qu.:237.0   3rd Qu.:327.5              
##  Max.   :583.00   Max.   :992.00   Max.   :963.0   Max.   :947.0              
##  NA's   :3        NA's   :3        NA's   :3       NA's   :3                  
##  Tenaga.Kesehatan.Lingkungan  Tenaga.Gizi     Tenaga.Medis  
##  Min.   :  1.0               Min.   :  1.0   Min.   :  1.0  
##  1st Qu.: 90.0               1st Qu.:  2.5   1st Qu.:  3.0  
##  Median :322.0               Median :238.0   Median :  6.0  
##  Mean   :349.3               Mean   :313.9   Mean   :161.4  
##  3rd Qu.:589.0               3rd Qu.:581.5   3rd Qu.:220.5  
##  Max.   :986.0               Max.   :853.0   Max.   :928.0  
##  NA's   :3                   NA's   :3       NA's   :3      
##  Tenaga.Kesehatan.Psikologi.Klinis Tenaga.Keterapian.Fisik
##  Min.   :  1.00                    Min.   :  2.0          
##  1st Qu.:  5.00                    1st Qu.: 32.5          
##  Median : 25.00                    Median :135.0          
##  Mean   : 53.36                    Mean   :203.6          
##  3rd Qu.: 51.00                    3rd Qu.:353.5          
##  Max.   :333.00                    Max.   :766.0          
##  NA's   :3                         NA's   :3              
##  Tenaga.Keteknisan.Medis Tenaga.Teknik.Biomedika Tenaga.Kesehatan.Tradisional
##  Min.   :  1.0           Min.   :  1.0           Min.   :  1.0               
##  1st Qu.:  2.0           1st Qu.:  2.0           1st Qu.:  2.0               
##  Median : 20.0           Median :  8.0           Median :  5.5               
##  Mean   :172.9           Mean   :214.7           Mean   : 52.0               
##  3rd Qu.:242.0           3rd Qu.:355.0           3rd Qu.: 35.5               
##  Max.   :950.0           Max.   :931.0           Max.   :624.0               
##  NA's   :3               NA's   :3               NA's   :18
df_num <- na.omit(df_num)
df_num <- df_num[, sapply(df_num, function(x) sd(x, na.rm = TRUE) != 0)]

df_scaled <- scale(df_num)

sum(is.na(df_scaled))
## [1] 0
sum(is.nan(df_scaled))
## [1] 0
sum(is.infinite(df_scaled))
## [1] 0
set.seed(123)
wss <- sapply(1:10, function(k){
  kmeans(df_scaled, centers = k, nstart = 20)$tot.withinss
})

plot(1:10, wss, type = "b", pch = 19, 
     xlab = "Jumlah Cluster (K)", 
     ylab = "Total Within Sum of Squares", 
     main = "Elbow Method")

library(cluster)

avg_sil <- function(k){
  km <- kmeans(df_scaled, centers = k, nstart = 25)
  ss <- silhouette(km$cluster, dist(df_scaled))
  mean(ss[,3])
}

k_values <- 2:10
sil_values <- sapply(k_values, avg_sil)

plot(k_values, sil_values, type = "b", pch = 19, 
     xlab = "Jumlah Cluster (K)", 
     ylab = "Average Silhouette", 
     main = "Silhouette Analysis")

set.seed(123)
km_res <- kmeans(df_scaled, centers = 3, nstart = 25)

library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(km_res, data = df_scaled, 
             main = "K-means Clustering")

library(cluster)
kmed_res <- pam(df_scaled, k = 3)

# Visualisasi K-medoids
fviz_cluster(kmed_res, data = df_scaled,
             main = "K-medoids Clustering")

library(dbscan)
## 
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
kNNdistplot(df_scaled, k = 5)
abline(h = 0.7, col = "red", lty = 2)

db_res <- dbscan(df_scaled, eps = 0.7, minPts = 5)

fviz_cluster(list(data = df_scaled, cluster = db_res$cluster), 
             geom = "point",
             main = "DBSCAN Clustering")

dist_matrix <- dist(df_scaled)
hc <- hclust(dist_matrix, method = "ward.D2")

plot(hc, main = "Hierarchical Clustering Dendrogram",
     xlab = "", sub = "")
rect.hclust(hc, k = 3, border = "red")

hc_clusters <- cutree(hc, k = 3)
library(e1071)
## 
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
## 
##     element
fcm_res <- cmeans(df_scaled, centers = 3, iter.max = 100, m = 2)

fviz_cluster(list(data = df_scaled, cluster = fcm_res$cluster),
             main = "Fuzzy C-means Clustering")

df_result <- df_num
df_result$KMeans <- km_res$cluster
df_result$KMedoids <- kmed_res$clustering
df_result$DBSCAN <- db_res$cluster
df_result$Hierarchical <- hc_clusters
df_result$FuzzyCMeans <- fcm_res$cluster

kable(head(df_result, 10), caption = "Hasil Clustering")
Hasil Clustering
Perawat Bidan Kefarmasian Tenaga.Kesehatan.Masyarakat Tenaga.Kesehatan.Lingkungan Tenaga.Gizi Tenaga.Medis Tenaga.Kesehatan.Psikologi.Klinis Tenaga.Keterapian.Fisik Tenaga.Keteknisan.Medis Tenaga.Teknik.Biomedika Tenaga.Kesehatan.Tradisional KMeans KMedoids DBSCAN Hierarchical FuzzyCMeans
Sumatera Utara 24 25 5 4 986 2 12 45 516 2 4 9 3 1 0 1 3
Sumatera Barat 12 9 3 2 641 1 5 31 380 2 2 2 3 1 0 1 3
Riau 13 10 4 2 487 770 6 60 466 1 2 2 3 2 0 1 2
Jambi 8 7 2 1 474 521 3 30 218 857 1 9 3 2 0 1 2
Lampung 13 11 3 1 758 806 4 25 327 1 2 56 3 2 0 1 2
Kepulauan Riau 5 3 2 509 342 273 3 25 140 549 805 2 3 1 0 1 3
DKI Jakarta 39 7 13 1 743 1 28 333 2 5 8 166 1 3 0 2 1
Jawa Barat 74 33 26 4 2 3 33 263 2 9 12 40 1 3 0 2 1
Jawa Tengah 66 30 25 4 2 3 21 240 3 8 10 34 1 3 0 2 1
DI Yogyakarta 12 3 5 490 398 634 6 178 599 2 2 4 3 2 0 1 2
cluster_summary <- aggregate(df_num, by = list(Cluster = km_res$cluster), mean)
kable(cluster_summary, caption = "Rata-rata Setiap Cluster (K-means)", digits = 2)
Rata-rata Setiap Cluster (K-means)
Cluster Perawat Bidan Kefarmasian Tenaga.Kesehatan.Masyarakat Tenaga.Kesehatan.Lingkungan Tenaga.Gizi Tenaga.Medis Tenaga.Kesehatan.Psikologi.Klinis Tenaga.Keterapian.Fisik Tenaga.Keteknisan.Medis Tenaga.Teknik.Biomedika Tenaga.Kesehatan.Tradisional
1 63.25 25.75 21.50 3.25 187.25 2.75 27.00 266.50 2.25 7.25 10.25 106.75
2 195.33 316.00 272.67 240.00 63.00 98.67 294.33 2.00 16.00 59.00 188.67 229.67
3 13.29 8.94 3.47 60.41 516.29 433.71 5.06 45.35 361.82 211.18 102.47 7.76
library(fpc)
## 
## Attaching package: 'fpc'
## The following object is masked from 'package:dbscan':
## 
##     dbscan
sil_kmeans <- silhouette(km_res$cluster, dist(df_scaled))
cat("Silhouette Score (K-means):", mean(sil_kmeans[,3]), "\n")
## Silhouette Score (K-means): 0.2667086
sil_kmedoids <- silhouette(kmed_res$clustering, dist(df_scaled))
cat("Silhouette Score (K-medoids):", mean(sil_kmedoids[,3]), "\n")
## Silhouette Score (K-medoids): 0.1296055
stats_kmeans <- cluster.stats(dist(df_scaled), km_res$cluster)
cat("Dunn Index (K-means):", stats_kmeans$dunn, "\n")
## Dunn Index (K-means): 0.3124339
stats_kmedoids <- cluster.stats(dist(df_scaled), kmed_res$clustering)
cat("Dunn Index (K-medoids):", stats_kmedoids$dunn, "\n")
## Dunn Index (K-medoids): 0.2735522
cat("Within-cluster SS (K-means):", km_res$tot.withinss, "\n")
## Within-cluster SS (K-means): 157.3457
par(mfrow = c(2, 3))
plot(df_scaled, col = km_res$cluster, main = "K-means", pch = 19)
plot(df_scaled, col = kmed_res$clustering, main = "K-medoids", pch = 19)
plot(df_scaled, col = db_res$cluster + 1, main = "DBSCAN (0 = Noise)", pch = 19)
plot(df_scaled, col = hc_clusters, main = "Hierarchical", pch = 19)
plot(df_scaled, col = fcm_res$cluster, main = "Fuzzy C-means", pch = 19)

par(mfrow = c(1, 1))