# Load libraries
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
# Create the dataframe from the provided data
# Combining relevant columns: Provinsi, APBD_2023, IPM_2023, APS_13_15, APS_16_18, APS_19_24
# Note: Replacing commas with dots for decimal points
data <- data.frame(
  Provinsi = c("DKI JAKARTA", "JAWA BARAT", "JAWA TENGAH", "DI YOGYAKARTA", "JAWA TIMUR", "ACEH", 
               "SUMATERA UTARA", "SUMATERA BARAT", "RIAU", "JAMBI", "SUMATERA SELATAN", "LAMPUNG", 
               "KALIMANTAN BARAT", "KALIMANTAN TENGAH", "KALIMANTAN SELATAN", "KALIMANTAN TIMUR", 
               "SULAWESI UTARA", "SULAWESI TENGAH", "SULAWESI SELATAN", "SULAWESI TENGGARA", 
               "MALUKU", "BALI", "NUSA TENGGARA BARAT", "NUSA TENGGARA TIMUR", "PAPUA", 
               "BENGKULU", "MALUKU UTARA", "BANTEN", "K. BANGKA BELITUNG", "GORONTALO", 
               "K. RIAU", "PAPUA BARAT", "SULAWESI BARAT", "KALIMANTAN UTARA", 
               "PAPUA TENGAH", "PAPUA SELATAN", "PAPUA PEGUNUNGAN", "PAPUA BARAT DAYA"),
  APBD_2023 = c(76613.8, 33931.5, 26763.2, 6000.1, 31120.7, 11093.9, 14273.5, 6789.3, 10142.5, 
                5501.7, 10511.8, 7381.8, 6281.0, 6784.3, 7727.8, 17031.2, 3493.3, 5182.9, 9995.6, 
                4192.2, 2980.6, 7522.4, 5992.0, 5111.5, 3492.8, 2977.4, 4242.7, 11774.7, 2962.3, 
                1854.1, 4151.6, 5505.6, 2081.8, 2997.4, 2346.9, 1618.4, 1944.4, 2826.0),
  IPM_2023 = c(83.55, 74.24, 73.39, 81.09, 74.65, 74.70, 75.13, 75.64, 74.95, 73.73, 73.18, 72.48, 
               70.47, 73.73, 74.66, 78.20, 75.04, 71.66, 74.60, 72.94, 72.75, 78.01, 72.37, 70.47, 
               63.01, 74.30, 70.98, 75.77, 74.09, 71.25, 79.08, 67.47, 69.80, 72.88, 63.01, 63.01, 
               63.01, 67.47),
  APS_16_18 = c(98.17, 95.75, 97.08, 98.88, 97.64, 97.72, 96.76, 96.79, 95.89, 96.01, 95.27, 95.93, 
                92.92, 95.21, 94.12, 98.71, 95.00, 93.13, 93.22, 95.00, 97.97, 97.95, 97.95, 94.89, 
                80.91, 97.91, 97.51, 96.65, 93.20, 91.85, 99.07, 97.42, 89.47, 96.96, 80.91, 80.91, 
                80.91, 97.42),
  RASIO = c(19, 69, 71, 92, 75, 84, 80, 85, 79, 73, 72, 15, 70, 67, 70, 82, 75, 77, 72, 75, 80, 85, 78, 76,
            65, 80, 79, 70, 70, 72, 85, 81, 72, 78, 65, 65, 65, 81)
)

# Remove rows with NA values (for provinces without APS_19_24)
data <- na.omit(data)

data
##               Provinsi APBD_2023 IPM_2023 APS_16_18 RASIO
## 1          DKI JAKARTA   76613.8    83.55     98.17    19
## 2           JAWA BARAT   33931.5    74.24     95.75    69
## 3          JAWA TENGAH   26763.2    73.39     97.08    71
## 4        DI YOGYAKARTA    6000.1    81.09     98.88    92
## 5           JAWA TIMUR   31120.7    74.65     97.64    75
## 6                 ACEH   11093.9    74.70     97.72    84
## 7       SUMATERA UTARA   14273.5    75.13     96.76    80
## 8       SUMATERA BARAT    6789.3    75.64     96.79    85
## 9                 RIAU   10142.5    74.95     95.89    79
## 10               JAMBI    5501.7    73.73     96.01    73
## 11    SUMATERA SELATAN   10511.8    73.18     95.27    72
## 12             LAMPUNG    7381.8    72.48     95.93    15
## 13    KALIMANTAN BARAT    6281.0    70.47     92.92    70
## 14   KALIMANTAN TENGAH    6784.3    73.73     95.21    67
## 15  KALIMANTAN SELATAN    7727.8    74.66     94.12    70
## 16    KALIMANTAN TIMUR   17031.2    78.20     98.71    82
## 17      SULAWESI UTARA    3493.3    75.04     95.00    75
## 18     SULAWESI TENGAH    5182.9    71.66     93.13    77
## 19    SULAWESI SELATAN    9995.6    74.60     93.22    72
## 20   SULAWESI TENGGARA    4192.2    72.94     95.00    75
## 21              MALUKU    2980.6    72.75     97.97    80
## 22                BALI    7522.4    78.01     97.95    85
## 23 NUSA TENGGARA BARAT    5992.0    72.37     97.95    78
## 24 NUSA TENGGARA TIMUR    5111.5    70.47     94.89    76
## 25               PAPUA    3492.8    63.01     80.91    65
## 26            BENGKULU    2977.4    74.30     97.91    80
## 27        MALUKU UTARA    4242.7    70.98     97.51    79
## 28              BANTEN   11774.7    75.77     96.65    70
## 29  K. BANGKA BELITUNG    2962.3    74.09     93.20    70
## 30           GORONTALO    1854.1    71.25     91.85    72
## 31             K. RIAU    4151.6    79.08     99.07    85
## 32         PAPUA BARAT    5505.6    67.47     97.42    81
## 33      SULAWESI BARAT    2081.8    69.80     89.47    72
## 34    KALIMANTAN UTARA    2997.4    72.88     96.96    78
## 35        PAPUA TENGAH    2346.9    63.01     80.91    65
## 36       PAPUA SELATAN    1618.4    63.01     80.91    65
## 37    PAPUA PEGUNUNGAN    1944.4    63.01     80.91    65
## 38    PAPUA BARAT DAYA    2826.0    67.47     97.42    81
# Set Provinsi as row names for clustering
row.names(data) <- data$Provinsi
data <- data[, -1]  # Remove Provinsi column
# Scale the data (standardization is important for k-means)
data_scaled <- scale(data)
data_scaled
##                       APBD_2023    IPM_2023   APS_16_18       RASIO
## DKI JAKARTA          4.94965851  2.30795501  0.72260133 -3.61759601
## JAWA BARAT           1.78670480  0.30739559  0.25265495 -0.22665747
## JAWA TENGAH          1.25550097  0.12474516  0.51093127 -0.09101993
## DI YOGYAKARTA       -0.28313974  1.77934318  0.86047817  1.33317426
## JAWA TIMUR           1.57841166  0.39549756  0.61967919  0.18025515
## ACEH                 0.09433415  0.40624170  0.63521461  0.79062409
## SUMATERA UTARA       0.32995706  0.49864133  0.44878960  0.51934901
## SUMATERA BARAT      -0.22465641  0.60823159  0.45461538  0.85844286
## RIAU                 0.02383106  0.45996242  0.27984193  0.45153024
## JAMBI               -0.32007346  0.19780533  0.30314506  0.04461761
## SUMATERA SELATAN     0.05119788  0.07961975  0.15944244 -0.02320116
## LAMPUNG             -0.18074944 -0.07079825  0.28760964 -3.88887109
## KALIMANTAN BARAT    -0.26232376 -0.50271280 -0.29691045 -0.15883870
## KALIMANTAN TENGAH   -0.22502693  0.19780533  0.14779088 -0.36229501
## KALIMANTAN SELATAN  -0.15510926  0.39764639 -0.06387918 -0.15883870
## KALIMANTAN TIMUR     0.53431524  1.15833171  0.82746540  0.65498655
## SULAWESI UTARA      -0.46890509  0.47930187  0.10701041  0.18025515
## SULAWESI TENGAH     -0.34369800 -0.24700219 -0.25612998  0.31589270
## SULAWESI SELATAN     0.01294509  0.38475342 -0.23865263 -0.02320116
## SULAWESI TENGGARA   -0.41711340  0.02804787  0.10701041  0.18025515
## MALUKU              -0.50689850 -0.01277988  0.68376279  0.51934901
## BALI                -0.17033034  1.11750397  0.67987894  0.85844286
## NUSA TENGGARA BARAT -0.28373998 -0.09443536  0.67987894  0.38371147
## NUSA TENGGARA TIMUR -0.34898906 -0.50271280  0.08564921  0.24807392
## PAPUA               -0.46894214 -2.10573893 -2.62916502 -0.49793255
## BENGKULU            -0.50713564  0.32028856  0.67211123  0.51934901
## MALUKU UTARA        -0.41337112 -0.39312254  0.59443414  0.45153024
## BANTEN               0.14478454  0.63616636  0.42742840 -0.15883870
## K. BANGKA BELITUNG  -0.50825462  0.27516316 -0.24253649 -0.15883870
## GORONTALO           -0.59037731 -0.33510417 -0.50469666 -0.02320116
## K. RIAU             -0.42012204  1.34742863  0.89737478  0.85844286
## PAPUA BARAT         -0.31978445 -1.14736138  0.57695679  0.58716778
## SULAWESI BARAT      -0.57350369 -0.64668431 -0.96687533 -0.02320116
## KALIMANTAN UTARA    -0.50565354  0.01515490  0.48762814  0.38371147
## PAPUA TENGAH        -0.55385857 -2.10573893 -2.62916502 -0.49793255
## PAPUA SELATAN       -0.60784375 -2.10573893 -2.62916502 -0.49793255
## PAPUA PEGUNUNGAN    -0.58368566 -2.10573893 -2.62916502 -0.49793255
## PAPUA BARAT DAYA    -0.51835507 -1.14736138  0.57695679  0.58716778
## attr(,"scaled:center")
##  APBD_2023   IPM_2023  APS_16_18      RASIO 
## 9820.91316   72.80947   94.44895   72.34211 
## attr(,"scaled:scale")
##    APBD_2023     IPM_2023    APS_16_18        RASIO 
## 13494.443438     4.653698     5.149524    14.745180
# Determine the optimal number of clusters using elbow method
fviz_nbclust(data_scaled, kmeans, method = "wss") + 
  labs(subtitle = "Elbow method")

# Alternatively, use silhouette method
fviz_nbclust(data_scaled, kmeans, method = "silhouette") + 
  labs(subtitle = "Silhouette method")

# Assume we choose k=3 based on the plots (you can adjust k)
set.seed(123)  # For reproducibility
kmeans_result <- kmeans(data_scaled, centers = 3, nstart = 25)

# Print the cluster assignments
print(kmeans_result)
## K-means clustering with 3 clusters of sizes 2, 4, 32
## 
## Cluster means:
##     APBD_2023   IPM_2023  APS_16_18      RASIO
## 1  2.38445453  1.1185784  0.5051055 -3.7532335
## 2 -0.55358253 -2.1057389 -2.6291650 -0.4979326
## 3 -0.07983059  0.1933062  0.2970765  0.2968187
## 
## Clustering vector:
##         DKI JAKARTA          JAWA BARAT         JAWA TENGAH       DI YOGYAKARTA 
##                   1                   3                   3                   3 
##          JAWA TIMUR                ACEH      SUMATERA UTARA      SUMATERA BARAT 
##                   3                   3                   3                   3 
##                RIAU               JAMBI    SUMATERA SELATAN             LAMPUNG 
##                   3                   3                   3                   1 
##    KALIMANTAN BARAT   KALIMANTAN TENGAH  KALIMANTAN SELATAN    KALIMANTAN TIMUR 
##                   3                   3                   3                   3 
##      SULAWESI UTARA     SULAWESI TENGAH    SULAWESI SELATAN   SULAWESI TENGGARA 
##                   3                   3                   3                   3 
##              MALUKU                BALI NUSA TENGGARA BARAT NUSA TENGGARA TIMUR 
##                   3                   3                   3                   3 
##               PAPUA            BENGKULU        MALUKU UTARA              BANTEN 
##                   2                   3                   3                   3 
##  K. BANGKA BELITUNG           GORONTALO             K. RIAU         PAPUA BARAT 
##                   3                   3                   3                   3 
##      SULAWESI BARAT    KALIMANTAN UTARA        PAPUA TENGAH       PAPUA SELATAN 
##                   3                   3                   2                   2 
##    PAPUA PEGUNUNGAN    PAPUA BARAT DAYA 
##                   2                   3 
## 
## Within cluster sum of squares by cluster:
## [1] 16.12118042  0.01101455 34.66312113
##  (between_SS / total_SS =  65.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
# Visualize the clusters
fviz_cluster(kmeans_result, data = data_scaled, 
             palette = "jco", 
             ggtheme = theme_minimal(),
             main = "K-Means Clustering Results")

# K-means clustering 
dk<-dist(data_scaled, method="euclidean") 
fitk<-kmeans(data_scaled, centers=3) 
# Silhouette plot 
sik<-silhouette(fitk$cluster, dk) 
plot(sik)

# Add cluster to original data
data$cluster <- kmeans_result$cluster

# View the data with clusters
print(data)
##                     APBD_2023 IPM_2023 APS_16_18 RASIO cluster
## DKI JAKARTA           76613.8    83.55     98.17    19       1
## JAWA BARAT            33931.5    74.24     95.75    69       3
## JAWA TENGAH           26763.2    73.39     97.08    71       3
## DI YOGYAKARTA          6000.1    81.09     98.88    92       3
## JAWA TIMUR            31120.7    74.65     97.64    75       3
## ACEH                  11093.9    74.70     97.72    84       3
## SUMATERA UTARA        14273.5    75.13     96.76    80       3
## SUMATERA BARAT         6789.3    75.64     96.79    85       3
## RIAU                  10142.5    74.95     95.89    79       3
## JAMBI                  5501.7    73.73     96.01    73       3
## SUMATERA SELATAN      10511.8    73.18     95.27    72       3
## LAMPUNG                7381.8    72.48     95.93    15       1
## KALIMANTAN BARAT       6281.0    70.47     92.92    70       3
## KALIMANTAN TENGAH      6784.3    73.73     95.21    67       3
## KALIMANTAN SELATAN     7727.8    74.66     94.12    70       3
## KALIMANTAN TIMUR      17031.2    78.20     98.71    82       3
## SULAWESI UTARA         3493.3    75.04     95.00    75       3
## SULAWESI TENGAH        5182.9    71.66     93.13    77       3
## SULAWESI SELATAN       9995.6    74.60     93.22    72       3
## SULAWESI TENGGARA      4192.2    72.94     95.00    75       3
## MALUKU                 2980.6    72.75     97.97    80       3
## BALI                   7522.4    78.01     97.95    85       3
## NUSA TENGGARA BARAT    5992.0    72.37     97.95    78       3
## NUSA TENGGARA TIMUR    5111.5    70.47     94.89    76       3
## PAPUA                  3492.8    63.01     80.91    65       2
## BENGKULU               2977.4    74.30     97.91    80       3
## MALUKU UTARA           4242.7    70.98     97.51    79       3
## BANTEN                11774.7    75.77     96.65    70       3
## K. BANGKA BELITUNG     2962.3    74.09     93.20    70       3
## GORONTALO              1854.1    71.25     91.85    72       3
## K. RIAU                4151.6    79.08     99.07    85       3
## PAPUA BARAT            5505.6    67.47     97.42    81       3
## SULAWESI BARAT         2081.8    69.80     89.47    72       3
## KALIMANTAN UTARA       2997.4    72.88     96.96    78       3
## PAPUA TENGAH           2346.9    63.01     80.91    65       2
## PAPUA SELATAN          1618.4    63.01     80.91    65       2
## PAPUA PEGUNUNGAN       1944.4    63.01     80.91    65       2
## PAPUA BARAT DAYA       2826.0    67.47     97.42    81       3
# Summary statistics by cluster
data %>%
  group_by(cluster) %>%
  summarise_all(mean, na.rm = TRUE)
## # A tibble: 3 × 5
##   cluster APBD_2023 IPM_2023 APS_16_18 RASIO
##     <int>     <dbl>    <dbl>     <dbl> <dbl>
## 1       1    41998.     78.0      97.1  17  
## 2       2     2351.     63.0      80.9  65  
## 3       3     8744.     73.7      96.0  76.7