# Load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
# Create the dataframe from the provided data
# Combining relevant columns: Provinsi, APBD_2023, IPM_2023, APS_13_15, APS_16_18, APS_19_24
# Note: Replacing commas with dots for decimal points
data <- data.frame(
Provinsi = c("DKI JAKARTA", "JAWA BARAT", "JAWA TENGAH", "DI YOGYAKARTA", "JAWA TIMUR", "ACEH",
"SUMATERA UTARA", "SUMATERA BARAT", "RIAU", "JAMBI", "SUMATERA SELATAN", "LAMPUNG",
"KALIMANTAN BARAT", "KALIMANTAN TENGAH", "KALIMANTAN SELATAN", "KALIMANTAN TIMUR",
"SULAWESI UTARA", "SULAWESI TENGAH", "SULAWESI SELATAN", "SULAWESI TENGGARA",
"MALUKU", "BALI", "NUSA TENGGARA BARAT", "NUSA TENGGARA TIMUR", "PAPUA",
"BENGKULU", "MALUKU UTARA", "BANTEN", "K. BANGKA BELITUNG", "GORONTALO",
"K. RIAU", "PAPUA BARAT", "SULAWESI BARAT", "KALIMANTAN UTARA",
"PAPUA TENGAH", "PAPUA SELATAN", "PAPUA PEGUNUNGAN", "PAPUA BARAT DAYA"),
APBD_2023 = c(76613.8, 33931.5, 26763.2, 6000.1, 31120.7, 11093.9, 14273.5, 6789.3, 10142.5,
5501.7, 10511.8, 7381.8, 6281.0, 6784.3, 7727.8, 17031.2, 3493.3, 5182.9, 9995.6,
4192.2, 2980.6, 7522.4, 5992.0, 5111.5, 3492.8, 2977.4, 4242.7, 11774.7, 2962.3,
1854.1, 4151.6, 5505.6, 2081.8, 2997.4, 2346.9, 1618.4, 1944.4, 2826.0),
IPM_2023 = c(83.55, 74.24, 73.39, 81.09, 74.65, 74.70, 75.13, 75.64, 74.95, 73.73, 73.18, 72.48,
70.47, 73.73, 74.66, 78.20, 75.04, 71.66, 74.60, 72.94, 72.75, 78.01, 72.37, 70.47,
63.01, 74.30, 70.98, 75.77, 74.09, 71.25, 79.08, 67.47, 69.80, 72.88, 63.01, 63.01,
63.01, 67.47),
APS_16_18 = c(98.17, 95.75, 97.08, 98.88, 97.64, 97.72, 96.76, 96.79, 95.89, 96.01, 95.27, 95.93,
92.92, 95.21, 94.12, 98.71, 95.00, 93.13, 93.22, 95.00, 97.97, 97.95, 97.95, 94.89,
80.91, 97.91, 97.51, 96.65, 93.20, 91.85, 99.07, 97.42, 89.47, 96.96, 80.91, 80.91,
80.91, 97.42),
RASIO = c(19, 69, 71, 92, 75, 84, 80, 85, 79, 73, 72, 15, 70, 67, 70, 82, 75, 77, 72, 75, 80, 85, 78, 76,
65, 80, 79, 70, 70, 72, 85, 81, 72, 78, 65, 65, 65, 81)
)
# Remove rows with NA values (for provinces without APS_19_24)
data <- na.omit(data)
data
## Provinsi APBD_2023 IPM_2023 APS_16_18 RASIO
## 1 DKI JAKARTA 76613.8 83.55 98.17 19
## 2 JAWA BARAT 33931.5 74.24 95.75 69
## 3 JAWA TENGAH 26763.2 73.39 97.08 71
## 4 DI YOGYAKARTA 6000.1 81.09 98.88 92
## 5 JAWA TIMUR 31120.7 74.65 97.64 75
## 6 ACEH 11093.9 74.70 97.72 84
## 7 SUMATERA UTARA 14273.5 75.13 96.76 80
## 8 SUMATERA BARAT 6789.3 75.64 96.79 85
## 9 RIAU 10142.5 74.95 95.89 79
## 10 JAMBI 5501.7 73.73 96.01 73
## 11 SUMATERA SELATAN 10511.8 73.18 95.27 72
## 12 LAMPUNG 7381.8 72.48 95.93 15
## 13 KALIMANTAN BARAT 6281.0 70.47 92.92 70
## 14 KALIMANTAN TENGAH 6784.3 73.73 95.21 67
## 15 KALIMANTAN SELATAN 7727.8 74.66 94.12 70
## 16 KALIMANTAN TIMUR 17031.2 78.20 98.71 82
## 17 SULAWESI UTARA 3493.3 75.04 95.00 75
## 18 SULAWESI TENGAH 5182.9 71.66 93.13 77
## 19 SULAWESI SELATAN 9995.6 74.60 93.22 72
## 20 SULAWESI TENGGARA 4192.2 72.94 95.00 75
## 21 MALUKU 2980.6 72.75 97.97 80
## 22 BALI 7522.4 78.01 97.95 85
## 23 NUSA TENGGARA BARAT 5992.0 72.37 97.95 78
## 24 NUSA TENGGARA TIMUR 5111.5 70.47 94.89 76
## 25 PAPUA 3492.8 63.01 80.91 65
## 26 BENGKULU 2977.4 74.30 97.91 80
## 27 MALUKU UTARA 4242.7 70.98 97.51 79
## 28 BANTEN 11774.7 75.77 96.65 70
## 29 K. BANGKA BELITUNG 2962.3 74.09 93.20 70
## 30 GORONTALO 1854.1 71.25 91.85 72
## 31 K. RIAU 4151.6 79.08 99.07 85
## 32 PAPUA BARAT 5505.6 67.47 97.42 81
## 33 SULAWESI BARAT 2081.8 69.80 89.47 72
## 34 KALIMANTAN UTARA 2997.4 72.88 96.96 78
## 35 PAPUA TENGAH 2346.9 63.01 80.91 65
## 36 PAPUA SELATAN 1618.4 63.01 80.91 65
## 37 PAPUA PEGUNUNGAN 1944.4 63.01 80.91 65
## 38 PAPUA BARAT DAYA 2826.0 67.47 97.42 81
# Set Provinsi as row names for clustering
row.names(data) <- data$Provinsi
data <- data[, -1] # Remove Provinsi column
# Scale the data (standardization is important for k-means)
data_scaled <- scale(data)
data_scaled
## APBD_2023 IPM_2023 APS_16_18 RASIO
## DKI JAKARTA 4.94965851 2.30795501 0.72260133 -3.61759601
## JAWA BARAT 1.78670480 0.30739559 0.25265495 -0.22665747
## JAWA TENGAH 1.25550097 0.12474516 0.51093127 -0.09101993
## DI YOGYAKARTA -0.28313974 1.77934318 0.86047817 1.33317426
## JAWA TIMUR 1.57841166 0.39549756 0.61967919 0.18025515
## ACEH 0.09433415 0.40624170 0.63521461 0.79062409
## SUMATERA UTARA 0.32995706 0.49864133 0.44878960 0.51934901
## SUMATERA BARAT -0.22465641 0.60823159 0.45461538 0.85844286
## RIAU 0.02383106 0.45996242 0.27984193 0.45153024
## JAMBI -0.32007346 0.19780533 0.30314506 0.04461761
## SUMATERA SELATAN 0.05119788 0.07961975 0.15944244 -0.02320116
## LAMPUNG -0.18074944 -0.07079825 0.28760964 -3.88887109
## KALIMANTAN BARAT -0.26232376 -0.50271280 -0.29691045 -0.15883870
## KALIMANTAN TENGAH -0.22502693 0.19780533 0.14779088 -0.36229501
## KALIMANTAN SELATAN -0.15510926 0.39764639 -0.06387918 -0.15883870
## KALIMANTAN TIMUR 0.53431524 1.15833171 0.82746540 0.65498655
## SULAWESI UTARA -0.46890509 0.47930187 0.10701041 0.18025515
## SULAWESI TENGAH -0.34369800 -0.24700219 -0.25612998 0.31589270
## SULAWESI SELATAN 0.01294509 0.38475342 -0.23865263 -0.02320116
## SULAWESI TENGGARA -0.41711340 0.02804787 0.10701041 0.18025515
## MALUKU -0.50689850 -0.01277988 0.68376279 0.51934901
## BALI -0.17033034 1.11750397 0.67987894 0.85844286
## NUSA TENGGARA BARAT -0.28373998 -0.09443536 0.67987894 0.38371147
## NUSA TENGGARA TIMUR -0.34898906 -0.50271280 0.08564921 0.24807392
## PAPUA -0.46894214 -2.10573893 -2.62916502 -0.49793255
## BENGKULU -0.50713564 0.32028856 0.67211123 0.51934901
## MALUKU UTARA -0.41337112 -0.39312254 0.59443414 0.45153024
## BANTEN 0.14478454 0.63616636 0.42742840 -0.15883870
## K. BANGKA BELITUNG -0.50825462 0.27516316 -0.24253649 -0.15883870
## GORONTALO -0.59037731 -0.33510417 -0.50469666 -0.02320116
## K. RIAU -0.42012204 1.34742863 0.89737478 0.85844286
## PAPUA BARAT -0.31978445 -1.14736138 0.57695679 0.58716778
## SULAWESI BARAT -0.57350369 -0.64668431 -0.96687533 -0.02320116
## KALIMANTAN UTARA -0.50565354 0.01515490 0.48762814 0.38371147
## PAPUA TENGAH -0.55385857 -2.10573893 -2.62916502 -0.49793255
## PAPUA SELATAN -0.60784375 -2.10573893 -2.62916502 -0.49793255
## PAPUA PEGUNUNGAN -0.58368566 -2.10573893 -2.62916502 -0.49793255
## PAPUA BARAT DAYA -0.51835507 -1.14736138 0.57695679 0.58716778
## attr(,"scaled:center")
## APBD_2023 IPM_2023 APS_16_18 RASIO
## 9820.91316 72.80947 94.44895 72.34211
## attr(,"scaled:scale")
## APBD_2023 IPM_2023 APS_16_18 RASIO
## 13494.443438 4.653698 5.149524 14.745180
# Determine the optimal number of clusters using elbow method
fviz_nbclust(data_scaled, kmeans, method = "wss") +
labs(subtitle = "Elbow method")

# Alternatively, use silhouette method
fviz_nbclust(data_scaled, kmeans, method = "silhouette") +
labs(subtitle = "Silhouette method")

# Assume we choose k=3 based on the plots (you can adjust k)
set.seed(123) # For reproducibility
kmeans_result <- kmeans(data_scaled, centers = 3, nstart = 25)
# Print the cluster assignments
print(kmeans_result)
## K-means clustering with 3 clusters of sizes 2, 4, 32
##
## Cluster means:
## APBD_2023 IPM_2023 APS_16_18 RASIO
## 1 2.38445453 1.1185784 0.5051055 -3.7532335
## 2 -0.55358253 -2.1057389 -2.6291650 -0.4979326
## 3 -0.07983059 0.1933062 0.2970765 0.2968187
##
## Clustering vector:
## DKI JAKARTA JAWA BARAT JAWA TENGAH DI YOGYAKARTA
## 1 3 3 3
## JAWA TIMUR ACEH SUMATERA UTARA SUMATERA BARAT
## 3 3 3 3
## RIAU JAMBI SUMATERA SELATAN LAMPUNG
## 3 3 3 1
## KALIMANTAN BARAT KALIMANTAN TENGAH KALIMANTAN SELATAN KALIMANTAN TIMUR
## 3 3 3 3
## SULAWESI UTARA SULAWESI TENGAH SULAWESI SELATAN SULAWESI TENGGARA
## 3 3 3 3
## MALUKU BALI NUSA TENGGARA BARAT NUSA TENGGARA TIMUR
## 3 3 3 3
## PAPUA BENGKULU MALUKU UTARA BANTEN
## 2 3 3 3
## K. BANGKA BELITUNG GORONTALO K. RIAU PAPUA BARAT
## 3 3 3 3
## SULAWESI BARAT KALIMANTAN UTARA PAPUA TENGAH PAPUA SELATAN
## 3 3 2 2
## PAPUA PEGUNUNGAN PAPUA BARAT DAYA
## 2 3
##
## Within cluster sum of squares by cluster:
## [1] 16.12118042 0.01101455 34.66312113
## (between_SS / total_SS = 65.7 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
# Visualize the clusters
fviz_cluster(kmeans_result, data = data_scaled,
palette = "jco",
ggtheme = theme_minimal(),
main = "K-Means Clustering Results")

# K-means clustering
dk<-dist(data_scaled, method="euclidean")
fitk<-kmeans(data_scaled, centers=3)
# Silhouette plot
sik<-silhouette(fitk$cluster, dk)
plot(sik)

# Add cluster to original data
data$cluster <- kmeans_result$cluster
# View the data with clusters
print(data)
## APBD_2023 IPM_2023 APS_16_18 RASIO cluster
## DKI JAKARTA 76613.8 83.55 98.17 19 1
## JAWA BARAT 33931.5 74.24 95.75 69 3
## JAWA TENGAH 26763.2 73.39 97.08 71 3
## DI YOGYAKARTA 6000.1 81.09 98.88 92 3
## JAWA TIMUR 31120.7 74.65 97.64 75 3
## ACEH 11093.9 74.70 97.72 84 3
## SUMATERA UTARA 14273.5 75.13 96.76 80 3
## SUMATERA BARAT 6789.3 75.64 96.79 85 3
## RIAU 10142.5 74.95 95.89 79 3
## JAMBI 5501.7 73.73 96.01 73 3
## SUMATERA SELATAN 10511.8 73.18 95.27 72 3
## LAMPUNG 7381.8 72.48 95.93 15 1
## KALIMANTAN BARAT 6281.0 70.47 92.92 70 3
## KALIMANTAN TENGAH 6784.3 73.73 95.21 67 3
## KALIMANTAN SELATAN 7727.8 74.66 94.12 70 3
## KALIMANTAN TIMUR 17031.2 78.20 98.71 82 3
## SULAWESI UTARA 3493.3 75.04 95.00 75 3
## SULAWESI TENGAH 5182.9 71.66 93.13 77 3
## SULAWESI SELATAN 9995.6 74.60 93.22 72 3
## SULAWESI TENGGARA 4192.2 72.94 95.00 75 3
## MALUKU 2980.6 72.75 97.97 80 3
## BALI 7522.4 78.01 97.95 85 3
## NUSA TENGGARA BARAT 5992.0 72.37 97.95 78 3
## NUSA TENGGARA TIMUR 5111.5 70.47 94.89 76 3
## PAPUA 3492.8 63.01 80.91 65 2
## BENGKULU 2977.4 74.30 97.91 80 3
## MALUKU UTARA 4242.7 70.98 97.51 79 3
## BANTEN 11774.7 75.77 96.65 70 3
## K. BANGKA BELITUNG 2962.3 74.09 93.20 70 3
## GORONTALO 1854.1 71.25 91.85 72 3
## K. RIAU 4151.6 79.08 99.07 85 3
## PAPUA BARAT 5505.6 67.47 97.42 81 3
## SULAWESI BARAT 2081.8 69.80 89.47 72 3
## KALIMANTAN UTARA 2997.4 72.88 96.96 78 3
## PAPUA TENGAH 2346.9 63.01 80.91 65 2
## PAPUA SELATAN 1618.4 63.01 80.91 65 2
## PAPUA PEGUNUNGAN 1944.4 63.01 80.91 65 2
## PAPUA BARAT DAYA 2826.0 67.47 97.42 81 3
# Summary statistics by cluster
data %>%
group_by(cluster) %>%
summarise_all(mean, na.rm = TRUE)
## # A tibble: 3 × 5
## cluster APBD_2023 IPM_2023 APS_16_18 RASIO
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 41998. 78.0 97.1 17
## 2 2 2351. 63.0 80.9 65
## 3 3 8744. 73.7 96.0 76.7