Load Library

library(readxl)
library(tidyverse)
library(cluster)
library(factoextra)
library(sf)

Persiapan Data

data_raw <- read_excel("C:/Users/ACER/OneDrive/ドキュメント/Kerja Praktik/fix data kp.xlsx")

Prapemrosesan Data

Menghitung Prevalensi stunting pertahun(dalam persen) untuk setiap kelurahan

data_prevalensi <- data_raw %>%
  mutate(
    Prev_2022 = (JMLH_BALITA_STUNTING_2022 / BALITA_YANG_DIUKUR_2022) * 100,
    Prev_2023 = (JMLH_BALITA_STUNTING_2023 / BALITA_YANG_DIUKUR_2023) * 100,
    Prev_2024 = (JMLH_BALITA_STUNTING_2024 / BALITA_YANG_DIUKUR_2024) * 100,
    Prev_2025 = (JMLH_BALITA_STUNTING_2025 / BALITA_YANG_DIUKUR_2025) * 100
  ) %>%
  select(KELURAHAN, Prev_2022, Prev_2023, Prev_2024, Prev_2025)

data_cluster <- data_prevalensi %>%
  remove_rownames() %>%
  column_to_rownames(var = "KELURAHAN")

head(data_prevalensi)
## # A tibble: 6 × 5
##   KELURAHAN    Prev_2022 Prev_2023 Prev_2024 Prev_2025
##   <chr>            <dbl>     <dbl>     <dbl>     <dbl>
## 1 Blotongan         5.32      3.91      5.27      6.72
## 2 Sidorejo Lor      6.47      7.36      9.65      9.62
## 3 Salatiga         10.1       4.45      6.54      5.57
## 4 Bugel            10.5       5.75      5.68      6.59
## 5 Kauman Kidul      7.05      5.94      9.05      8.59
## 6 Pulutan           4.83      4.98      9.02     12.4

Standarisasi Data (Z-score)

data_scaled <- scale(data_cluster)

Penentuan Jumlah Klaster Optimal (Metode Elbow)

fviz_nbclust(data_scaled, kmeans, method = "wss") +
  labs(subtitle = "Metode Elbow")

Proses Klasterisasi K-Means

set.seed(123)
final_kmeans <- kmeans(data_scaled, centers = 3, nstart = 25)
print(final_kmeans)
## K-means clustering with 3 clusters of sizes 9, 7, 7
## 
## Cluster means:
##     Prev_2022  Prev_2023  Prev_2024   Prev_2025
## 1 -0.88450747 -0.6556080 -0.9616061 -0.84289100
## 2  0.09165707  1.0670029  0.9923635  1.17656824
## 3  1.04556683 -0.2240783  0.2439872 -0.09285125
## 
## Clustering vector:
##          Blotongan       Sidorejo Lor           Salatiga              Bugel 
##                  1                  2                  3                  3 
##       Kauman Kidul            Pulutan Kutowinangun Kidul          Gendongan 
##                  2                  2                  1                  3 
##     Sidorejo Kidul         Kalibening        Tingkir Lor     Tingkir Tengah 
##                  2                  2                  3                  3 
##   Kutowinangun Lor           Noborejo              Ledok          Tegalrejo 
##                  3                  1                  1                  1 
##         Kumpulrejo          Randuacir           Cebongan          Kecandran 
##                  1                  1                  1                  2 
##              Dukuh         Mangunsari         Kalicacing 
##                  1                  2                  3 
## 
## Within cluster sum of squares by cluster:
## [1]  8.179086  7.981300 13.120706
##  (between_SS / total_SS =  66.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Interpretasi dan profilling klaster

hasil_akhir <- data_prevalensi  %>%
  mutate(Cluster = final_kmeans$cluster) %>%
  mutate(Cluster = case_when(
    Cluster == 1 ~ 1,
    Cluster == 3 ~ 2,
    Cluster == 2 ~ 3
  ))

profiling <- hasil_akhir %>%
  group_by(Cluster) %>%
  summarise(across(starts_with("Prev"), ~mean(.x, na.rm = TRUE)))

knitr::kable(profiling, digits = 2,
             caption = "Rata-rata Prevalensi Stunting per Klaster (%)")
Rata-rata Prevalensi Stunting per Klaster (%)
Cluster Prev_2022 Prev_2023 Prev_2024 Prev_2025
1 3.84 2.99 4.37 4.54
2 10.78 3.82 7.48 6.65
3 7.35 6.29 9.42 10.22

Visualisasi Klaster

fviz_cluster(final_kmeans, data = data_scaled,
             palette = c("#7F7F7F", "#4D4D4D", "#1A1A1A"),
             ellipse.type = "convex",
             ggtheme = theme_minimal(),
             main = "Hasil Pengelompokan Kelurahan (K-Means)")

Visualisasi spasial

peta_salatiga <- st_read("C:/Users/ACER/Downloads/KOTA SALATIGA/ADMINISTRASIDESA_AR_25K.shp")
## Reading layer `ADMINISTRASIDESA_AR_25K' from data source 
##   `C:\Users\ACER\Downloads\KOTA SALATIGA\ADMINISTRASIDESA_AR_25K.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 42 features and 27 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 110.4663 ymin: -7.391281 xmax: 110.5347 ymax: -7.283891
## Geodetic CRS:  WGS 84
peta_final <- peta_salatiga %>%
  mutate(NAMOBJ_CAPS = trimws(toupper(NAMOBJ))) %>%
  inner_join(
    hasil_akhir %>% mutate(Kel_CAPS = trimws(toupper(KELURAHAN))),
    by = c("NAMOBJ_CAPS" = "Kel_CAPS")
  )

ggplot(data = peta_final) +
  geom_sf(aes(fill = as.factor(Cluster)), color = "white", linewidth = 0.4) +
  scale_fill_manual(
    values = c("1" = "green", "2" = "yellow", "3" = "red"),
    labels = c("1" = "Rendah", "2" = "Sedang", "3" = "Tinggi"),
    name = "Tingkat Kerawanan"
  ) +
  theme_minimal() +
  labs(title = "Peta Sebaran Klaster Stunting Kota Salatiga")