df <- read.csv("Tenaga_Kesehatan.csv", stringsAsFactors = FALSE)
df_num <- df[, -1]
rownames(df_num) <- df$Provinsi
library(knitr)
kable(head(df_num, 10), caption = "Data Tenaga Kesehatan Menurut Provinsi", align = "l")
Data Tenaga Kesehatan Menurut Provinsi
| Aceh |
21 |
21 |
4 |
4 |
1 |
1 |
6 |
77 |
484 |
2 |
2 |
NA |
| Sumatera Utara |
24 |
25 |
5 |
4 |
986 |
2 |
12 |
45 |
516 |
2 |
4 |
9 |
| Sumatera Barat |
12 |
9 |
3 |
2 |
641 |
1 |
5 |
31 |
380 |
2 |
2 |
2 |
| Riau |
13 |
10 |
4 |
2 |
487 |
770 |
6 |
60 |
466 |
1 |
2 |
2 |
| Jambi |
8 |
7 |
2 |
1 |
474 |
521 |
3 |
30 |
218 |
857 |
1 |
9 |
| Sumatera Selatan |
17 |
15 |
4 |
3 |
1 |
1 |
6 |
49 |
418 |
2 |
3 |
NA |
| Bengkulu |
5 |
5 |
1 |
1 |
330 |
578 |
1 |
16 |
86 |
367 |
931 |
NA |
| Lampung |
13 |
11 |
3 |
1 |
758 |
806 |
4 |
25 |
327 |
1 |
2 |
56 |
| Kepulauan Bangka Belitung |
4 |
2 |
1 |
479 |
156 |
247 |
1 |
20 |
114 |
402 |
585 |
NA |
| Kepulauan Riau |
5 |
3 |
2 |
509 |
342 |
273 |
3 |
25 |
140 |
549 |
805 |
2 |
str(df_num)
## 'data.frame': 42 obs. of 12 variables:
## $ Perawat : int 21 24 12 13 8 17 5 13 4 5 ...
## $ Bidan : int 21 25 9 10 7 15 5 11 2 3 ...
## $ Kefarmasian : int 4 5 3 4 2 4 1 3 1 2 ...
## $ Tenaga.Kesehatan.Masyarakat : int 4 4 2 2 1 3 1 1 479 509 ...
## $ Tenaga.Kesehatan.Lingkungan : int 1 986 641 487 474 1 330 758 156 342 ...
## $ Tenaga.Gizi : int 1 2 1 770 521 1 578 806 247 273 ...
## $ Tenaga.Medis : int 6 12 5 6 3 6 1 4 1 3 ...
## $ Tenaga.Kesehatan.Psikologi.Klinis: int 77 45 31 60 30 49 16 25 20 25 ...
## $ Tenaga.Keterapian.Fisik : int 484 516 380 466 218 418 86 327 114 140 ...
## $ Tenaga.Keteknisan.Medis : int 2 2 2 1 857 2 367 1 402 549 ...
## $ Tenaga.Teknik.Biomedika : int 2 4 2 2 1 3 931 2 585 805 ...
## $ Tenaga.Kesehatan.Tradisional : int NA 9 2 2 9 NA NA 56 NA 2 ...
summary(df_num)
## Perawat Bidan Kefarmasian Tenaga.Kesehatan.Masyarakat
## Min. : 1.00 Min. : 1.00 Min. : 1.0 Min. : 1.0
## 1st Qu.: 4.00 1st Qu.: 3.50 1st Qu.: 3.0 1st Qu.: 1.0
## Median : 10.00 Median : 7.00 Median : 5.0 Median : 4.0
## Mean : 29.82 Mean : 58.21 Mean :178.9 Mean :157.7
## 3rd Qu.: 16.50 3rd Qu.: 13.50 3rd Qu.:237.0 3rd Qu.:327.5
## Max. :583.00 Max. :992.00 Max. :963.0 Max. :947.0
## NA's :3 NA's :3 NA's :3 NA's :3
## Tenaga.Kesehatan.Lingkungan Tenaga.Gizi Tenaga.Medis
## Min. : 1.0 Min. : 1.0 Min. : 1.0
## 1st Qu.: 90.0 1st Qu.: 2.5 1st Qu.: 3.0
## Median :322.0 Median :238.0 Median : 6.0
## Mean :349.3 Mean :313.9 Mean :161.4
## 3rd Qu.:589.0 3rd Qu.:581.5 3rd Qu.:220.5
## Max. :986.0 Max. :853.0 Max. :928.0
## NA's :3 NA's :3 NA's :3
## Tenaga.Kesehatan.Psikologi.Klinis Tenaga.Keterapian.Fisik
## Min. : 1.00 Min. : 2.0
## 1st Qu.: 5.00 1st Qu.: 32.5
## Median : 25.00 Median :135.0
## Mean : 53.36 Mean :203.6
## 3rd Qu.: 51.00 3rd Qu.:353.5
## Max. :333.00 Max. :766.0
## NA's :3 NA's :3
## Tenaga.Keteknisan.Medis Tenaga.Teknik.Biomedika Tenaga.Kesehatan.Tradisional
## Min. : 1.0 Min. : 1.0 Min. : 1.0
## 1st Qu.: 2.0 1st Qu.: 2.0 1st Qu.: 2.0
## Median : 20.0 Median : 8.0 Median : 5.5
## Mean :172.9 Mean :214.7 Mean : 52.0
## 3rd Qu.:242.0 3rd Qu.:355.0 3rd Qu.: 35.5
## Max. :950.0 Max. :931.0 Max. :624.0
## NA's :3 NA's :3 NA's :18
df_num <- na.omit(df_num)
df_num <- df_num[, sapply(df_num, function(x) sd(x, na.rm = TRUE) != 0)]
df_scaled <- scale(df_num)
sum(is.na(df_scaled))
## [1] 0
sum(is.nan(df_scaled))
## [1] 0
sum(is.infinite(df_scaled))
## [1] 0
set.seed(123)
wss <- sapply(1:10, function(k){
kmeans(df_scaled, centers = k, nstart = 20)$tot.withinss
})
plot(1:10, wss, type = "b", pch = 19,
xlab = "Jumlah Cluster (K)",
ylab = "Total Within Sum of Squares",
main = "Elbow Method")

library(cluster)
avg_sil <- function(k){
km <- kmeans(df_scaled, centers = k, nstart = 25)
ss <- silhouette(km$cluster, dist(df_scaled))
mean(ss[,3])
}
k_values <- 2:10
sil_values <- sapply(k_values, avg_sil)
plot(k_values, sil_values, type = "b", pch = 19,
xlab = "Jumlah Cluster (K)",
ylab = "Average Silhouette",
main = "Silhouette Analysis")

set.seed(123)
km_res <- kmeans(df_scaled, centers = 3, nstart = 25)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(km_res, data = df_scaled,
main = "K-means Clustering")

library(cluster)
kmed_res <- pam(df_scaled, k = 3)
# Visualisasi K-medoids
fviz_cluster(kmed_res, data = df_scaled,
main = "K-medoids Clustering")

library(dbscan)
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
##
## as.dendrogram
kNNdistplot(df_scaled, k = 5)
abline(h = 0.7, col = "red", lty = 2)

db_res <- dbscan(df_scaled, eps = 0.7, minPts = 5)
fviz_cluster(list(data = df_scaled, cluster = db_res$cluster),
geom = "point",
main = "DBSCAN Clustering")

dist_matrix <- dist(df_scaled)
hc <- hclust(dist_matrix, method = "ward.D2")
plot(hc, main = "Hierarchical Clustering Dendrogram",
xlab = "", sub = "")
rect.hclust(hc, k = 3, border = "red")

hc_clusters <- cutree(hc, k = 3)
library(e1071)
##
## Attaching package: 'e1071'
## The following object is masked from 'package:ggplot2':
##
## element
fcm_res <- cmeans(df_scaled, centers = 3, iter.max = 100, m = 2)
fviz_cluster(list(data = df_scaled, cluster = fcm_res$cluster),
main = "Fuzzy C-means Clustering")

df_result <- df_num
df_result$KMeans <- km_res$cluster
df_result$KMedoids <- kmed_res$clustering
df_result$DBSCAN <- db_res$cluster
df_result$Hierarchical <- hc_clusters
df_result$FuzzyCMeans <- fcm_res$cluster
kable(head(df_result, 10), caption = "Hasil Clustering")
Hasil Clustering
| Sumatera Utara |
24 |
25 |
5 |
4 |
986 |
2 |
12 |
45 |
516 |
2 |
4 |
9 |
3 |
1 |
0 |
1 |
3 |
| Sumatera Barat |
12 |
9 |
3 |
2 |
641 |
1 |
5 |
31 |
380 |
2 |
2 |
2 |
3 |
1 |
0 |
1 |
3 |
| Riau |
13 |
10 |
4 |
2 |
487 |
770 |
6 |
60 |
466 |
1 |
2 |
2 |
3 |
2 |
0 |
1 |
2 |
| Jambi |
8 |
7 |
2 |
1 |
474 |
521 |
3 |
30 |
218 |
857 |
1 |
9 |
3 |
2 |
0 |
1 |
2 |
| Lampung |
13 |
11 |
3 |
1 |
758 |
806 |
4 |
25 |
327 |
1 |
2 |
56 |
3 |
2 |
0 |
1 |
2 |
| Kepulauan Riau |
5 |
3 |
2 |
509 |
342 |
273 |
3 |
25 |
140 |
549 |
805 |
2 |
3 |
1 |
0 |
1 |
3 |
| DKI Jakarta |
39 |
7 |
13 |
1 |
743 |
1 |
28 |
333 |
2 |
5 |
8 |
166 |
1 |
3 |
0 |
2 |
1 |
| Jawa Barat |
74 |
33 |
26 |
4 |
2 |
3 |
33 |
263 |
2 |
9 |
12 |
40 |
1 |
3 |
0 |
2 |
1 |
| Jawa Tengah |
66 |
30 |
25 |
4 |
2 |
3 |
21 |
240 |
3 |
8 |
10 |
34 |
1 |
3 |
0 |
2 |
1 |
| DI Yogyakarta |
12 |
3 |
5 |
490 |
398 |
634 |
6 |
178 |
599 |
2 |
2 |
4 |
3 |
2 |
0 |
1 |
2 |
cluster_summary <- aggregate(df_num, by = list(Cluster = km_res$cluster), mean)
kable(cluster_summary, caption = "Rata-rata Setiap Cluster (K-means)", digits = 2)
Rata-rata Setiap Cluster (K-means)
| 1 |
63.25 |
25.75 |
21.50 |
3.25 |
187.25 |
2.75 |
27.00 |
266.50 |
2.25 |
7.25 |
10.25 |
106.75 |
| 2 |
195.33 |
316.00 |
272.67 |
240.00 |
63.00 |
98.67 |
294.33 |
2.00 |
16.00 |
59.00 |
188.67 |
229.67 |
| 3 |
13.29 |
8.94 |
3.47 |
60.41 |
516.29 |
433.71 |
5.06 |
45.35 |
361.82 |
211.18 |
102.47 |
7.76 |
library(fpc)
##
## Attaching package: 'fpc'
## The following object is masked from 'package:dbscan':
##
## dbscan
sil_kmeans <- silhouette(km_res$cluster, dist(df_scaled))
cat("Silhouette Score (K-means):", mean(sil_kmeans[,3]), "\n")
## Silhouette Score (K-means): 0.2667086
sil_kmedoids <- silhouette(kmed_res$clustering, dist(df_scaled))
cat("Silhouette Score (K-medoids):", mean(sil_kmedoids[,3]), "\n")
## Silhouette Score (K-medoids): 0.1296055
stats_kmeans <- cluster.stats(dist(df_scaled), km_res$cluster)
cat("Dunn Index (K-means):", stats_kmeans$dunn, "\n")
## Dunn Index (K-means): 0.3124339
stats_kmedoids <- cluster.stats(dist(df_scaled), kmed_res$clustering)
cat("Dunn Index (K-medoids):", stats_kmedoids$dunn, "\n")
## Dunn Index (K-medoids): 0.2735522
cat("Within-cluster SS (K-means):", km_res$tot.withinss, "\n")
## Within-cluster SS (K-means): 157.3457
par(mfrow = c(2, 3))
plot(df_scaled, col = km_res$cluster, main = "K-means", pch = 19)
plot(df_scaled, col = kmed_res$clustering, main = "K-medoids", pch = 19)
plot(df_scaled, col = db_res$cluster + 1, main = "DBSCAN (0 = Noise)", pch = 19)
plot(df_scaled, col = hc_clusters, main = "Hierarchical", pch = 19)
plot(df_scaled, col = fcm_res$cluster, main = "Fuzzy C-means", pch = 19)
par(mfrow = c(1, 1))
