Pendahuluan

Analisis segmentasi wilayah menggunakan data sosial ekonomi provinsi di Indonesia tahun 2020 dan 2022. Analisis dilakukan menggunakan metode clustering seperti KMeans, Fuzzy C-Means, dan DBSCAN, serta visualisasi hasil dan evaluasi cluster.

library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(pheatmap)
## Warning: package 'pheatmap' was built under R version 4.4.3
library(readr)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(kernlab)
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:psych':
## 
##     alpha
## The following object is masked from 'package:ggplot2':
## 
##     alpha
library(ppclust)
## Warning: package 'ppclust' was built under R version 4.4.3
## 
## Attaching package: 'ppclust'
## The following object is masked from 'package:psych':
## 
##     pca
library(dbscan)
## Warning: package 'dbscan' was built under R version 4.4.3
## 
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
## 
##     as.dendrogram

Deskripsi Data

Membaca Data dan Melihat Struktur Data

data2020 <- read_excel("D:/Semester 4/Analisis Multivariat/projek/2020.xlsx") %>% type_convert()
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Provinsi = col_character(),
##   `Melek Huruf (%)` = col_double(),
##   `Gini Ratio` = col_double()
## )
data2022 <- read_excel("D:/Semester 4/Analisis Multivariat/projek/2022.xlsx") %>% type_convert()
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Provinsi = col_character()
## )
str(data2020)
## tibble [34 × 12] (S3: tbl_df/tbl/data.frame)
##  $ Provinsi                     : chr [1:34] "Aceh" "Sumatera Utara" "Sumatera Barat" "Riau" ...
##  $ UMP (Rp/bulan)               : num [1:34] 3165030 2499422 2484041 2888563 2630161 ...
##  $ Melek Huruf (%)              : num [1:34] 98.2 99.2 99.2 99.2 98.2 ...
##  $ RLS (%)                      : num [1:34] 9.71 9.83 9.34 9.47 8.97 ...
##  $ IPM (%)                      : num [1:34] 72 71.8 72.4 72.7 71.3 ...
##  $ Penduduk Miskin (%)          : num [1:34] 14.99 8.75 6.28 6.82 7.58 ...
##  $ Gini Ratio                   : num [1:34] 0.32 0.32 0.31 0.33 0.32 0.34 0.33 0.33 0.26 0.34 ...
##  $ Pengeluaran Kapita (Rp/bulan): num [1:34] 1080171 1124253 1235050 1340446 1126690 ...
##  $ Konsumsi Protein (g/hari)    : num [1:34] 60.9 62.6 57.9 59.4 57.9 ...
##  $ Konsumsi Kalori (kkal/hari)  : num [1:34] 2091 2122 2110 2076 2081 ...
##  $ Estimasi Harapan Hidup       : num [1:34] 69.9 69.1 69.5 71.6 71.2 ...
##  $ TPT (%)                      : num [1:34] 11.65 10.96 10.76 11.12 7.58 ...
str(data2022)
## tibble [34 × 12] (S3: tbl_df/tbl/data.frame)
##  $ Provinsi                     : chr [1:34] "Aceh" "Sumatera Utara" "Sumatera Barat" "Riau" ...
##  $ UMP (Rp/bulan)               : num [1:34] 2224915 2405744 2536968 2596299 2393405 ...
##  $ Melek Huruf (%)              : num [1:34] 98.2 99.1 99.3 99.2 98.1 ...
##  $ RLS (%)                      : num [1:34] 9.44 9.71 9.18 9.22 8.68 ...
##  $ IPM (%)                      : num [1:34] 72.8 72.7 73.3 73.5 72.1 ...
##  $ Penduduk Miskin (%)          : num [1:34] 29.4 16.8 12 13.6 15.3 ...
##  $ Gini Ratio                   : num [1:34] 0.602 0.638 0.592 0.649 0.655 0.669 0.63 0.627 0.491 0.667 ...
##  $ Pengeluaran Kapita (Rp/bulan): num [1:34] 1180133 1216497 1342986 1425171 1261837 ...
##  $ Konsumsi Protein (g/hari)    : num [1:34] 61.2 64.7 59.2 59.7 60.7 ...
##  $ Konsumsi Kalori (kkal/hari)  : num [1:34] 2064 2123 2109 2022 2070 ...
##  $ Estimasi Harapan Hidup       : num [1:34] 70.2 69.6 69.9 72 71.5 ...
##  $ TPT (%)                      : num [1:34] 5.97 5.47 6.17 4.4 4.7 4.74 3.39 4.31 4.18 8.02 ...

Statistik Deskriptif Data 2020 dan 2022

summary(data2020)
##    Provinsi         UMP (Rp/bulan)    Melek Huruf (%)    RLS (%)      
##  Length:34          Min.   :1765608   Min.   :77.90   Min.   : 6.960  
##  Class :character   1st Qu.:2407604   1st Qu.:94.85   1st Qu.: 8.495  
##  Mode  :character   Median :2595930   Median :98.14   Median : 9.145  
##                     Mean   :2676536   Mean   :96.35   Mean   : 9.082  
##                     3rd Qu.:3004238   3rd Qu.:98.92   3rd Qu.: 9.650  
##                     Max.   :4416187   Max.   :99.79   Max.   :11.170  
##     IPM (%)      Penduduk Miskin (%)   Gini Ratio    
##  Min.   :60.44   Min.   : 3.780      Min.   :0.2600  
##  1st Qu.:69.50   1st Qu.: 6.405      1st Qu.:0.3225  
##  Median :71.42   Median : 8.735      Median :0.3400  
##  Mean   :71.08   Mean   :10.427      Mean   :0.3491  
##  3rd Qu.:72.31   3rd Qu.:12.855      3rd Qu.:0.3775  
##  Max.   :80.77   Max.   :26.640      Max.   :0.4300  
##  Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari)
##  Min.   : 794361               Min.   :46.52            
##  1st Qu.:1065357               1st Qu.:57.76            
##  Median :1133383               Median :60.57            
##  Mean   :1252578               Mean   :60.97            
##  3rd Qu.:1381372               3rd Qu.:64.65            
##  Max.   :2257991               Max.   :73.66            
##  Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup    TPT (%)      
##  Min.   :1772                Min.   :65.06          Min.   : 2.790  
##  1st Qu.:2025                1st Qu.:68.67          1st Qu.: 6.772  
##  Median :2086                Median :69.96          Median : 8.480  
##  Mean   :2078                Mean   :70.04          Mean   : 9.116  
##  3rd Qu.:2141                3rd Qu.:71.53          3rd Qu.:11.165  
##  Max.   :2442                Max.   :74.99          Max.   :15.820
summary(data2022)
##    Provinsi         UMP (Rp/bulan)    Melek Huruf (%)    RLS (%)      
##  Length:34          Min.   :2010212   Min.   :81.19   Min.   : 7.020  
##  Class :character   1st Qu.:2400474   1st Qu.:95.25   1st Qu.: 8.088  
##  Mode  :character   Median :2641549   Median :98.13   Median : 8.835  
##                     Mean   :2875905   Mean   :96.69   Mean   : 8.839  
##                     3rd Qu.:3194514   3rd Qu.:98.92   3rd Qu.: 9.360  
##                     Max.   :5589155   Max.   :99.81   Max.   :11.310  
##     IPM (%)      Penduduk Miskin (%)   Gini Ratio    
##  Min.   :61.39   Min.   : 9.06       Min.   :0.4910  
##  1st Qu.:70.23   1st Qu.:12.64       1st Qu.:0.6262  
##  Median :72.19   Median :17.02       Median :0.6680  
##  Mean   :71.97   Mean   :20.54       Mean   :0.6871  
##  3rd Qu.:73.22   3rd Qu.:24.43       3rd Qu.:0.7415  
##  Max.   :81.65   Max.   :53.36       Max.   :0.8980  
##  Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari)
##  Min.   : 884102               Min.   :45.07            
##  1st Qu.:1144451               1st Qu.:58.45            
##  Median :1234662               Median :60.93            
##  Mean   :1338086               Mean   :61.42            
##  3rd Qu.:1447032               3rd Qu.:64.66            
##  Max.   :2525347               Max.   :74.82            
##  Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup    TPT (%)     
##  Min.   :1837                Min.   :65.63          Min.   :3.110  
##  1st Qu.:1962                1st Qu.:68.98          1st Qu.:3.985  
##  Median :2040                Median :70.44          Median :4.775  
##  Mean   :2046                Mean   :70.42          Mean   :5.123  
##  3rd Qu.:2116                3rd Qu.:71.90          3rd Qu.:5.923  
##  Max.   :2460                Max.   :75.08          Max.   :8.530

Visualisasi Distribusi Data dengan Histogram

data2020_long <- melt(data2020, id.vars = "Provinsi")
ggplot(data2020_long, aes(x = value)) + 
  geom_histogram(bins = 30, fill = "skyblue") + 
  facet_wrap(~variable, scales = "free") +
  ggtitle("Histogram Data 2020")

data2022_long <- melt(data2022, id.vars = "Provinsi")
ggplot(data2022_long, aes(x = value)) + 
  geom_histogram(bins = 30, fill = "salmon") + 
  facet_wrap(~variable, scales = "free") +
  ggtitle("Histogram Data 2022")

Korelasi Antar Variabel

pheatmap(cor(data2020[,-1], use = "complete.obs"), main = "Korelasi 2020")

pheatmap(cor(data2022[,-1], use = "complete.obs"), main = "Korelasi 2022")

pheatmap(cor(data2020[,-1], use = "complete.obs"), main = "Korelasi 2020")

pheatmap(cor(data2022[,-1], use = "complete.obs"), main = "Korelasi 2022")

Deteksi Missing Value

colSums(is.na(data2020))
##                      Provinsi                UMP (Rp/bulan) 
##                             0                             0 
##               Melek Huruf (%)                       RLS (%) 
##                             0                             0 
##                       IPM (%)           Penduduk Miskin (%) 
##                             0                             0 
##                    Gini Ratio Pengeluaran Kapita (Rp/bulan) 
##                             0                             0 
##     Konsumsi Protein (g/hari)   Konsumsi Kalori (kkal/hari) 
##                             0                             0 
##        Estimasi Harapan Hidup                       TPT (%) 
##                             0                             0
colSums(is.na(data2022))
##                      Provinsi                UMP (Rp/bulan) 
##                             0                             0 
##               Melek Huruf (%)                       RLS (%) 
##                             0                             0 
##                       IPM (%)           Penduduk Miskin (%) 
##                             0                             0 
##                    Gini Ratio Pengeluaran Kapita (Rp/bulan) 
##                             0                             0 
##     Konsumsi Protein (g/hari)   Konsumsi Kalori (kkal/hari) 
##                             0                             0 
##        Estimasi Harapan Hidup                       TPT (%) 
##                             0                             0

Deteksi dan Penanganan Data Outlier

ggplot(data2020_long, aes(x = "", y = value)) +
  geom_boxplot(fill = "lightblue") +
  facet_wrap(~variable, scales = "free_y") +
  ggtitle("Boxplot Data 2020") +
  theme(axis.text.x = element_blank())

ggplot(data2022_long, aes(x = "", y = value)) +
  geom_boxplot(fill = "lightcoral") +
  facet_wrap(~variable, scales = "free_y") +
  ggtitle("Boxplot Data 2022") +
  theme(axis.text.x = element_blank())

outlier_count <- function(df) {
  sapply(df %>% select(where(is.numeric)), function(x) {
    Q1 <- quantile(x, 0.25, na.rm = TRUE)
    Q3 <- quantile(x, 0.75, na.rm = TRUE)
    IQR <- Q3 - Q1
    lower <- Q1 - 1.5 * IQR
    upper <- Q3 + 1.5 * IQR
    sum(x < lower | x > upper, na.rm = TRUE)
  })
}

cat("Outlier 2020:\n")
## Outlier 2020:
print(outlier_count(data2020))
##                UMP (Rp/bulan)               Melek Huruf (%) 
##                             1                             2 
##                       RLS (%)                       IPM (%) 
##                             0                             5 
##           Penduduk Miskin (%)                    Gini Ratio 
##                             1                             0 
## Pengeluaran Kapita (Rp/bulan)     Konsumsi Protein (g/hari) 
##                             1                             1 
##   Konsumsi Kalori (kkal/hari)        Estimasi Harapan Hidup 
##                             3                             0 
##                       TPT (%) 
##                             0
cat("Outlier 2022:\n")
## Outlier 2022:
print(outlier_count(data2022))
##                UMP (Rp/bulan)               Melek Huruf (%) 
##                             1                             2 
##                       RLS (%)                       IPM (%) 
##                             1                             3 
##           Penduduk Miskin (%)                    Gini Ratio 
##                             2                             0 
## Pengeluaran Kapita (Rp/bulan)     Konsumsi Protein (g/hari) 
##                             1                             2 
##   Konsumsi Kalori (kkal/hari)        Estimasi Harapan Hidup 
##                             1                             0 
##                       TPT (%) 
##                             0
handle_outliers <- function(df) {
  for (col in names(df)) {
    if (is.numeric(df[[col]])) {
      Q1 <- quantile(df[[col]], 0.25, na.rm = TRUE)
      Q3 <- quantile(df[[col]], 0.75, na.rm = TRUE)
      IQR <- Q3 - Q1
      lower <- Q1 - 1.5 * IQR
      upper <- Q3 + 1.5 * IQR
      df[[col]] <- pmin(pmax(df[[col]], lower), upper)
    }
  }
  return(df)
}

data2020_clean <- handle_outliers(data2020[,-1])
data2022_clean <- handle_outliers(data2022[,-1])

Visualisasi Setelah Penanganan Outlier

ggplot(melt(cbind(Provinsi = data2020$Provinsi, data2020_clean), id.vars = "Provinsi"),
       aes(x = "", y = value)) +
  geom_boxplot(fill = "lightblue") +
  facet_wrap(~variable, scales = "free_y") +
  ggtitle("Setelah Outlier Handling 2020") +
  theme(axis.text.x = element_blank())

ggplot(melt(cbind(Provinsi = data2022$Provinsi, data2022_clean), id.vars = "Provinsi"),
       aes(x = "", y = value)) +
  geom_boxplot(fill = "lightcoral") +
  facet_wrap(~variable, scales = "free_y") +
  ggtitle("Setelah Outlier Handling 2022") +
  theme(axis.text.x = element_blank())

Standarisasi Data (Z-Score)

scale_z <- function(df) {
  as.data.frame(scale(df))
}

data2020_scaled <- scale(data2020_clean)
data2022_scaled <- scale(data2022_clean)

Clustering: KMeans, Fuzzy C-Means, dan DBSCAN

set.seed(123)
km_2020_2 <- kmeans(data2020_scaled, centers = 2)
km_2022_2 <- kmeans(data2022_scaled, centers = 2)
set.seed(123)
km_2020_4 <- kmeans(data2020_scaled, centers = 4)
km_2022_4 <- kmeans(data2022_scaled, centers = 4)
set.seed(123)
km_2020_6 <- kmeans(data2020_scaled, centers = 6)
km_2022_6 <- kmeans(data2022_scaled, centers = 6)
fcm_2020_2 <- fcm(data2020_scaled, centers = 2)
fcm_2022_2 <- fcm(data2022_scaled, centers = 2)
fcm_2020_4 <- fcm(data2020_scaled, centers = 4)
fcm_2022_4 <- fcm(data2022_scaled, centers = 4)
fcm_2020_6 <- fcm(data2020_scaled, centers = 6)
fcm_2022_6 <- fcm(data2022_scaled, centers = 6)
db_2020_2 <- dbscan(data2020_scaled, eps = 3.5, minPts = 2)
db_2022_2 <- dbscan(data2022_scaled, eps = 3.5, minPts = 2)
db_2020_4 <- dbscan(data2020_scaled, eps = 3.5, minPts = 4)
db_2022_4 <- dbscan(data2022_scaled, eps = 3.5, minPts = 4)
db_2020_6 <- dbscan(data2020_scaled, eps = 3.5, minPts = 6)
db_2022_6 <- dbscan(data2022_scaled, eps = 3.5, minPts = 6)

Visualisasi Hasil Clustering dengan PCA dan Biplot

pca_2020 <- prcomp(data2020_scaled)
pca_2022 <- prcomp(data2022_scaled)

KMeans

2 Cluster

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(km_2020_2$cluster), addEllipses = TRUE, title = "PCA KMeans 2020")

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(km_2022_2$cluster), addEllipses = TRUE, title = "PCA KMeans 2022")

4 Cluster

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(km_2020_4$cluster), addEllipses = TRUE, title = "PCA KMeans 2020")
## Too few points to calculate an ellipse

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(km_2022_4$cluster), addEllipses = TRUE, title = "PCA KMeans 2022")

6 Cluster

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(km_2020_6$cluster), addEllipses = TRUE, title = "PCA KMeans 2020")
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(km_2022_6$cluster), addEllipses = TRUE, title = "PCA KMeans 2022")
## Too few points to calculate an ellipse

FCM

2 Cluster

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(fcm_2020_2$cluster), addEllipses = TRUE, title = "PCA FCM 2020")

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(fcm_2022_2$cluster), addEllipses = TRUE, title = "PCA FCM 2022")

4 Cluster

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(fcm_2020_4$cluster), addEllipses = TRUE, title = "PCA FCM 2020")

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(fcm_2022_4$cluster), addEllipses = TRUE, title = "PCA FCM 2022")

6 Cluster

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(fcm_2020_6$cluster), addEllipses = TRUE, title = "PCA FCM 2020")
## Too few points to calculate an ellipse

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(fcm_2022_6$cluster), addEllipses = TRUE, title = "PCA FCM 2022")

DBSCAN

2 Min

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(db_2020_2$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2020")
## Too few points to calculate an ellipse

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(db_2022_2$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2022")
## Too few points to calculate an ellipse

4 Min

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(db_2020_4$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2020")
## Too few points to calculate an ellipse

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(db_2022_4$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2022")
## Too few points to calculate an ellipse

6 Min

fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(db_2020_6$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2020")

fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(db_2022_6$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2022")

Evaluasi Cluster dengan Silhouette

KMeans

2 Cluster

plot(silhouette(km_2020_2$cluster, dist(data2020_scaled)), main = "Silhouette KMeans 2020")

plot(silhouette(km_2022_2$cluster, dist(data2022_scaled)), main = "Silhouette KMeans 2022")

4 Cluster

plot(silhouette(km_2020_4$cluster, dist(data2020_scaled)), main = "Silhouette KMeans 2020")

plot(silhouette(km_2022_4$cluster, dist(data2022_scaled)), main = "Silhouette KMeans 2022")

6 Cluster

plot(silhouette(km_2020_6$cluster, dist(data2020_scaled)), main = "Silhouette KMeans 2020")

plot(silhouette(km_2022_6$cluster, dist(data2022_scaled)), main = "Silhouette KMeans 2022")

FCM

2 Cluster

plot(silhouette(fcm_2020_2$cluster, dist(data2020_scaled)), main = "Silhouette FCM 2020")

plot(silhouette(fcm_2022_2$cluster, dist(data2022_scaled)), main = "Silhouette FCM 2022")

4 Cluster

plot(silhouette(fcm_2020_4$cluster, dist(data2020_scaled)), main = "Silhouette FCM 2020")

plot(silhouette(fcm_2022_4$cluster, dist(data2022_scaled)), main = "Silhouette FCM 2022")

6 Cluster

plot(silhouette(fcm_2020_6$cluster, dist(data2020_scaled)), main = "Silhouette FCM 2020")

plot(silhouette(fcm_2022_6$cluster, dist(data2022_scaled)), main = "Silhouette FCM 2022")

DBSCAN

2 Min

if (length(unique(db_2020_2$cluster)) > 1) {
  plot(silhouette(db_2020_2$cluster, dist(data2020_scaled)), main = "Silhouette DBSCAN 2020")
} else {
  cat("DBSCAN 2020: hanya 1 cluster, silhouette tidak dihitung.\n")
}

if (length(unique(db_2022_2$cluster)) > 1) {
  plot(silhouette(db_2022_2$cluster, dist(data2022_scaled)), main = "Silhouette DBSCAN 2022")
} else {
  cat("DBSCAN 2022: hanya 1 cluster, silhouette tidak dihitung.\n")
}

4 Min

if (length(unique(db_2020_4$cluster)) > 1) {
  plot(silhouette(db_2020_4$cluster, dist(data2020_scaled)), main = "Silhouette DBSCAN 2020")
} else {
  cat("DBSCAN 2020: hanya 1 cluster, silhouette tidak dihitung.\n")
}

if (length(unique(db_2022_4$cluster)) > 1) {
  plot(silhouette(db_2022_4$cluster, dist(data2022_scaled)), main = "Silhouette DBSCAN 2022")
} else {
  cat("DBSCAN 2022: hanya 1 cluster, silhouette tidak dihitung.\n")
}

6 Min

if (length(unique(db_2020_6$cluster)) > 1) {
  plot(silhouette(db_2020_6$cluster, dist(data2020_scaled)), main = "Silhouette DBSCAN 2020")
} else {
  cat("DBSCAN 2020: hanya 1 cluster, silhouette tidak dihitung.\n")
}

if (length(unique(db_2022_6$cluster)) > 1) {
  plot(silhouette(db_2022_6$cluster, dist(data2022_scaled)), main = "Silhouette DBSCAN 2022")
} else {
  cat("DBSCAN 2022: hanya 1 cluster, silhouette tidak dihitung.\n")
}


Hasil Cluster dan Profil Cluster

KMeans

2 Cluster

table(KMeans2020 = km_2020_2$cluster)
## KMeans2020
##  1  2 
## 17 17
table(KMeans2022 = km_2022_2$cluster)
## KMeans2022
##  1  2 
##  8 26
hasil_2020_2 <- data.frame(Provinsi = data2020$Provinsi, Cluster = km_2020_2$cluster)
hasil_2022_2 <- data.frame(Provinsi = data2022$Provinsi, Cluster = km_2022_2$cluster)

gabung <- merge(hasil_2020_2, hasil_2022_2, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            2            2
## 2                       Bali            2            1
## 3                     Banten            2            1
## 4                   Bengkulu            1            2
## 5              DI Yogyakarta            2            1
## 6                DKI Jakarta            2            1
## 7                  Gorontalo            1            2
## 8                      Jambi            2            2
## 9                 Jawa Barat            2            1
## 10               Jawa Tengah            1            2
## 11                Jawa Timur            1            2
## 12          Kalimantan Barat            1            2
## 13        Kalimantan Selatan            2            2
## 14         Kalimantan Tengah            2            2
## 15          Kalimantan Timur            2            1
## 16          Kalimantan Utara            2            2
## 17 Kepulauan Bangka Belitung            2            2
## 18            Kepulauan Riau            2            1
## 19                   Lampung            1            2
## 20                    Maluku            1            2
## 21              Maluku Utara            1            2
## 22       Nusa Tenggara Barat            1            2
## 23       Nusa Tenggara Timur            1            2
## 24                     Papua            1            2
## 25               Papua Barat            1            2
## 26                      Riau            2            2
## 27            Sulawesi Barat            1            2
## 28          Sulawesi Selatan            1            2
## 29           Sulawesi Tengah            1            2
## 30         Sulawesi Tenggara            1            2
## 31            Sulawesi Utara            2            1
## 32            Sumatera Barat            2            2
## 33          Sumatera Sełatan            1            2
## 34            Sumatera Utara            2            2
data2020_km_2 <- cbind(data2020[, -1], Cluster = km_2020_2$cluster)
data2022_km_2 <- cbind(data2022[, -1], Cluster = km_2022_2$cluster)

summary2020_km_2 <- data2020_km_2 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_km_2 <- data2022_km_2 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_km_2)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2534854 94.45706 8.675882 68.64765 13.854706 0.3564706 1070797 57.50118 2019.751 68.79353 7.964118
C2 2818218 98.24941 9.488824 73.51412 6.998824 0.3417647 1434359 64.43882 2137.180 71.28824 10.267059
knitr::kable(summary2022_km_2)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 3589878 98.10250 9.791250 76.61 13.66375 0.7577500 1671399 65.10469 2099.495 72.76375 6.843750
C2 2656221 96.25115 8.546154 70.54 22.65885 0.6654231 1235528 60.28023 2029.122 69.69615 4.593462
pheatmap(as.matrix(summary2020_km_2[,-1]), main = "Profil Cluster KMeans - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_km_2[,-1]), main = "Profil Cluster KMeans - 2022", cluster_rows = FALSE)

4 Cluster

table(KMeans2020 = km_2020_4$cluster)
## KMeans2020
##  1  2  3  4 
##  3 18  5  8
table(KMeans2022 = km_2022_4$cluster)
## KMeans2022
##  1  2  3  4 
##  5  7 16  6
hasil_2020_4 <- data.frame(Provinsi = data2020$Provinsi, Cluster = km_2020_4$cluster)
hasil_2022_4 <- data.frame(Provinsi = data2022$Provinsi, Cluster = km_2022_4$cluster)

gabung <- merge(hasil_2020_4, hasil_2022_4, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            2            3
## 2                       Bali            4            2
## 3                     Banten            4            4
## 4                   Bengkulu            2            3
## 5              DI Yogyakarta            4            2
## 6                DKI Jakarta            4            4
## 7                  Gorontalo            3            1
## 8                      Jambi            2            3
## 9                 Jawa Barat            4            4
## 10               Jawa Tengah            2            2
## 11                Jawa Timur            2            2
## 12          Kalimantan Barat            2            3
## 13        Kalimantan Selatan            2            3
## 14         Kalimantan Tengah            2            3
## 15          Kalimantan Timur            4            4
## 16          Kalimantan Utara            2            3
## 17 Kepulauan Bangka Belitung            2            3
## 18            Kepulauan Riau            4            4
## 19                   Lampung            2            3
## 20                    Maluku            1            3
## 21              Maluku Utara            1            3
## 22       Nusa Tenggara Barat            3            2
## 23       Nusa Tenggara Timur            3            1
## 24                     Papua            3            1
## 25               Papua Barat            1            1
## 26                      Riau            2            3
## 27            Sulawesi Barat            3            1
## 28          Sulawesi Selatan            2            2
## 29           Sulawesi Tengah            2            3
## 30         Sulawesi Tenggara            2            2
## 31            Sulawesi Utara            4            4
## 32            Sumatera Barat            2            3
## 33          Sumatera Sełatan            2            3
## 34            Sumatera Utara            2            3
data2020_km_4 <- cbind(data2020[, -1], Cluster = km_2020_4$cluster)
data2022_km_4 <- cbind(data2022[, -1], Cluster = km_2022_4$cluster)

summary2020_km_4 <- data2020_km_4 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_km_4 <- data2022_km_4 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_km_4)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2836905 98.57000 9.873333 67.69000 15.196667 0.3366667 1190586 51.63333 1816.087 66.77667 11.770000
C2 2635704 96.98444 8.938889 71.08167 9.293889 0.3316667 1172157 60.57889 2078.936 70.53389 8.521667
C3 2560943 90.14600 7.944000 65.73400 17.520000 0.3780000 1031176 58.31800 2084.818 66.48800 6.112000
C4 2780515 97.98125 9.820000 75.69250 6.753750 0.3750000 1595149 67.00875 2171.830 72.37625 11.333750
knitr::kable(summary2022_km_4)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2918778 93.14000 7.732000 65.98200 38.20000 0.7602000 1177166 55.04995 1959.405 66.86000 3.808000
C2 2471170 93.76143 8.655714 73.87571 20.27714 0.7630000 1232112 63.30735 2102.354 71.91429 4.665714
C3 2638739 98.19875 8.880625 71.50500 18.00750 0.6113750 1283506 60.80643 2025.864 70.13000 4.768750
C4 3944812 99.02333 9.865000 75.96667 12.89667 0.7398333 1741367 66.13659 2104.301 72.40500 7.696667
pheatmap(as.matrix(summary2020_km_4[,-1]), main = "Profil Cluster KMeans - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_km_4[,-1]), main = "Profil Cluster KMeans - 2022", cluster_rows = FALSE)

6 Cluster

table(KMeans2020 = km_2020_6$cluster)
## KMeans2020
##  1  2  3  4  5  6 
##  3  9  4  2 10  6
table(KMeans2022 = km_2022_6$cluster)
## KMeans2022
## 1 2 3 4 5 6 
## 6 6 8 2 8 4
hasil_2020_6 <- data.frame(Provinsi = data2020$Provinsi, Cluster = km_2020_6$cluster)
hasil_2022_6 <- data.frame(Provinsi = data2022$Provinsi, Cluster = km_2022_6$cluster)

gabung <- merge(hasil_2020_6, hasil_2022_6, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            5            3
## 2                       Bali            4            1
## 3                     Banten            6            2
## 4                   Bengkulu            2            3
## 5              DI Yogyakarta            4            1
## 6                DKI Jakarta            6            2
## 7                  Gorontalo            2            6
## 8                      Jambi            5            5
## 9                 Jawa Barat            6            2
## 10               Jawa Tengah            2            1
## 11                Jawa Timur            2            1
## 12          Kalimantan Barat            2            3
## 13        Kalimantan Selatan            5            5
## 14         Kalimantan Tengah            5            5
## 15          Kalimantan Timur            6            2
## 16          Kalimantan Utara            5            3
## 17 Kepulauan Bangka Belitung            5            5
## 18            Kepulauan Riau            6            2
## 19                   Lampung            2            3
## 20                    Maluku            1            3
## 21              Maluku Utara            1            3
## 22       Nusa Tenggara Barat            3            4
## 23       Nusa Tenggara Timur            3            6
## 24                     Papua            3            6
## 25               Papua Barat            1            6
## 26                      Riau            5            5
## 27            Sulawesi Barat            3            4
## 28          Sulawesi Selatan            2            1
## 29           Sulawesi Tengah            2            3
## 30         Sulawesi Tenggara            2            1
## 31            Sulawesi Utara            6            2
## 32            Sumatera Barat            5            5
## 33          Sumatera Sełatan            5            5
## 34            Sumatera Utara            5            5
data2020_km_6 <- cbind(data2020[, -1], Cluster = km_2020_6$cluster)
data2022_km_6 <- cbind(data2022[, -1], Cluster = km_2022_6$cluster)

summary2020_km_6 <- data2020_km_6 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_km_6 <- data2022_km_6 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_km_6)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2836905 98.57000 9.873333 67.69000 15.196667 0.3366667 1190586 51.63333 1816.087 66.77667 11.770000
C2 2368987 95.48111 8.636667 70.43778 11.655556 0.3588889 1058017 58.88889 2042.296 70.54556 7.553333
C3 2554453 87.99500 7.865000 64.99750 18.095000 0.3700000 1021900 58.19250 2094.770 66.09250 5.887500
C4 2129567 94.94500 9.630000 77.73500 8.030000 0.4000000 1460819 67.78000 2219.840 73.56000 4.430000
C5 2870869 98.51400 9.143000 71.42100 7.761000 0.3150000 1264495 61.92400 2108.519 70.27700 9.242000
C6 2997498 98.99333 9.883333 75.01167 6.328333 0.3666667 1639925 66.75167 2155.827 71.98167 13.635000
knitr::kable(summary2022_km_6)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2547997 94.56000 8.83000 74.61167 19.07333 0.7656667 1244020 61.38878 2042.673 72.72167 4.790000
C2 3944812 99.02333 9.86500 75.96667 12.89667 0.7398333 1741367 66.13659 2104.301 72.40500 7.696667
C3 2628997 97.65000 8.96375 70.73000 22.24500 0.6042500 1204563 57.85537 1942.029 69.83125 4.780000
C4 2087289 91.39500 7.84500 68.19000 25.58500 0.7400000 1058284 68.10980 2271.715 66.35000 3.515000
C5 2648481 98.74750 8.79750 72.28000 13.77000 0.6185000 1362450 63.75748 2109.698 70.42875 4.757500
C6 3107381 92.97000 7.64500 65.74750 41.83250 0.7670000 1232481 53.46222 1928.508 67.16750 3.982500
pheatmap(as.matrix(summary2020_km_6[,-1]), main = "Profil Cluster KMeans - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_km_6[,-1]), main = "Profil Cluster KMeans - 2022", cluster_rows = FALSE)

Fuzzy C-Means

2 Cluster

table(FuzzyCMeans2020 = fcm_2020_2$cluster)
## FuzzyCMeans2020
##  1  2 
## 17 17
table(FuzzyCMeans2022 = fcm_2022_2$cluster)
## FuzzyCMeans2022
##  1  2 
## 16 18
hasil_2020_2 <- data.frame(Provinsi = data2020$Provinsi, Cluster = fcm_2020_2$cluster)
hasil_2022_2 <- data.frame(Provinsi = data2022$Provinsi, Cluster = fcm_2022_2$cluster)

gabung <- merge(hasil_2020_2, hasil_2022_2, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            2            2
## 2                       Bali            2            1
## 3                     Banten            2            1
## 4                   Bengkulu            1            2
## 5              DI Yogyakarta            2            1
## 6                DKI Jakarta            2            1
## 7                  Gorontalo            1            2
## 8                      Jambi            2            2
## 9                 Jawa Barat            2            1
## 10               Jawa Tengah            1            2
## 11                Jawa Timur            1            2
## 12          Kalimantan Barat            1            2
## 13        Kalimantan Selatan            2            1
## 14         Kalimantan Tengah            2            1
## 15          Kalimantan Timur            2            1
## 16          Kalimantan Utara            2            1
## 17 Kepulauan Bangka Belitung            2            1
## 18            Kepulauan Riau            2            1
## 19                   Lampung            1            2
## 20                    Maluku            1            2
## 21              Maluku Utara            1            2
## 22       Nusa Tenggara Barat            1            2
## 23       Nusa Tenggara Timur            1            2
## 24                     Papua            1            2
## 25               Papua Barat            1            2
## 26                      Riau            2            1
## 27            Sulawesi Barat            1            2
## 28          Sulawesi Selatan            1            1
## 29           Sulawesi Tengah            1            2
## 30         Sulawesi Tenggara            1            2
## 31            Sulawesi Utara            2            1
## 32            Sumatera Barat            2            1
## 33          Sumatera Sełatan            1            2
## 34            Sumatera Utara            2            1
data2020_fcm_2 <- cbind(data2020[, -1], Cluster = fcm_2020_2$cluster)
data2022_fcm_2 <- cbind(data2022[, -1], Cluster = fcm_2022_2$cluster)

summary2020_fcm_2 <- data2020_fcm_2 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_fcm_2 <- data2022_fcm_2 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_fcm_2)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2534854 94.45706 8.675882 68.64765 13.854706 0.3564706 1070797 57.50118 2019.751 68.79353 7.964118
C2 2818218 98.24941 9.488824 73.51412 6.998824 0.3417647 1434359 64.43882 2137.180 71.28824 10.267059
knitr::kable(summary2022_fcm_2)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 3199865 98.06687 9.347500 74.54563 13.20125 0.685625 1535808 64.30794 2084.838 71.70562 5.858750
C2 2587941 95.46000 8.387222 69.67722 27.06778 0.688500 1162332 58.84425 2010.873 69.27333 4.468889
pheatmap(as.matrix(summary2020_fcm_2[,-1]), main = "Profil Cluster FCM - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_fcm_2[,-1]), main = "Profil Cluster FCM - 2022", cluster_rows = FALSE)

4 Cluster

table(FuzzyCMeans2020 = fcm_2020_4$cluster)
## FuzzyCMeans2020
##  1  2  3  4 
##  9  7  8 10
table(FuzzyCMeans2022 = fcm_2022_4$cluster)
## FuzzyCMeans2022
##  1  2  3  4 
##  9  8  6 11
hasil_2020_4 <- data.frame(Provinsi = data2020$Provinsi, Cluster = fcm_2020_4$cluster)
hasil_2022_4 <- data.frame(Provinsi = data2022$Provinsi, Cluster = fcm_2022_4$cluster)

gabung <- merge(hasil_2020_4, hasil_2022_4, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            1            4
## 2                       Bali            3            4
## 3                     Banten            3            3
## 4                   Bengkulu            2            1
## 5              DI Yogyakarta            3            1
## 6                DKI Jakarta            3            3
## 7                  Gorontalo            4            2
## 8                      Jambi            1            4
## 9                 Jawa Barat            3            3
## 10               Jawa Tengah            2            1
## 11                Jawa Timur            2            1
## 12          Kalimantan Barat            4            1
## 13        Kalimantan Selatan            1            4
## 14         Kalimantan Tengah            1            4
## 15          Kalimantan Timur            3            3
## 16          Kalimantan Utara            1            4
## 17 Kepulauan Bangka Belitung            1            4
## 18            Kepulauan Riau            3            3
## 19                   Lampung            4            1
## 20                    Maluku            4            2
## 21              Maluku Utara            4            1
## 22       Nusa Tenggara Barat            2            2
## 23       Nusa Tenggara Timur            4            2
## 24                     Papua            4            2
## 25               Papua Barat            4            2
## 26                      Riau            1            4
## 27            Sulawesi Barat            4            2
## 28          Sulawesi Selatan            2            1
## 29           Sulawesi Tengah            4            2
## 30         Sulawesi Tenggara            2            1
## 31            Sulawesi Utara            3            3
## 32            Sumatera Barat            1            4
## 33          Sumatera Sełatan            2            4
## 34            Sumatera Utara            1            4
data2020_fcm_4 <- cbind(data2020[, -1], Cluster = fcm_2020_4$cluster)
data2022_fcm_4 <- cbind(data2022[, -1], Cluster = fcm_2022_4$cluster)

summary2020_fcm_4 <- data2020_fcm_4 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_fcm_4 <- data2022_fcm_4 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_fcm_4)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2851731 98.48778 9.194444 71.57778 7.216667 0.3122222 1291435 62.15444 2106.380 70.32111 9.318889
C2 2403749 94.00571 8.675714 70.94571 11.982857 0.3657143 1061486 62.24143 2133.286 70.45714 7.570000
C3 2780515 97.98125 9.820000 75.69250 6.753750 0.3750000 1595149 67.00875 2171.830 72.37625 11.333750
C4 2626628 94.77300 8.676000 67.03900 15.165000 0.3500000 1077314 54.18300 1940.277 67.62900 8.240000
knitr::kable(summary2022_fcm_4)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2527640 95.51333 8.612222 72.43778 20.38333 0.7043333 1183799 59.76360 1987.393 71.58000 4.604444
C2 2743538 94.01500 8.168750 67.48375 34.41625 0.7210000 1162500 57.63758 2011.908 67.09375 4.133750
C3 3944812 99.02333 9.865000 75.96667 12.89667 0.7398333 1741367 66.13659 2104.301 72.40500 7.696667
C4 2674076 98.31545 8.952727 72.66455 14.75273 0.6197273 1372048 62.93918 2085.956 70.80091 4.862727
pheatmap(as.matrix(summary2020_fcm_4[,-1]), main = "Profil Cluster FCM - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_fcm_4[,-1]), main = "Profil Cluster FCM - 2022", cluster_rows = FALSE)

6 Cluster

table(FuzzyCMeans2020 = fcm_2020_6$cluster)
## FuzzyCMeans2020
##  1  2  4  5  6 
##  9  3  7  5 10
table(FuzzyCMeans2022 = fcm_2022_6$cluster)
## FuzzyCMeans2022
## 1 2 3 4 5 6 
## 7 5 5 5 6 6
hasil_2020_6 <- data.frame(Provinsi = data2020$Provinsi, Cluster = fcm_2020_6$cluster)
hasil_2022_6 <- data.frame(Provinsi = data2022$Provinsi, Cluster = fcm_2022_6$cluster)

gabung <- merge(hasil_2020_6, hasil_2022_6, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            1            6
## 2                       Bali            1            3
## 3                     Banten            5            2
## 4                   Bengkulu            4            6
## 5              DI Yogyakarta            5            1
## 6                DKI Jakarta            5            2
## 7                  Gorontalo            6            4
## 8                      Jambi            1            3
## 9                 Jawa Barat            2            2
## 10               Jawa Tengah            4            1
## 11                Jawa Timur            4            1
## 12          Kalimantan Barat            6            1
## 13        Kalimantan Selatan            1            5
## 14         Kalimantan Tengah            1            5
## 15          Kalimantan Timur            5            2
## 16          Kalimantan Utara            1            3
## 17 Kepulauan Bangka Belitung            1            5
## 18            Kepulauan Riau            5            2
## 19                   Lampung            6            6
## 20                    Maluku            6            6
## 21              Maluku Utara            6            6
## 22       Nusa Tenggara Barat            4            1
## 23       Nusa Tenggara Timur            6            4
## 24                     Papua            6            4
## 25               Papua Barat            6            4
## 26                      Riau            2            3
## 27            Sulawesi Barat            6            4
## 28          Sulawesi Selatan            4            1
## 29           Sulawesi Tengah            6            6
## 30         Sulawesi Tenggara            4            1
## 31            Sulawesi Utara            2            5
## 32            Sumatera Barat            1            3
## 33          Sumatera Sełatan            4            5
## 34            Sumatera Utara            1            5
data2020_fcm_6 <- cbind(data2020[, -1], Cluster = fcm_2020_6$cluster)
data2022_fcm_6 <- cbind(data2022[, -1], Cluster = fcm_2022_6$cluster)

summary2020_fcm_6 <- data2020_fcm_6 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_fcm_6 <- data2022_fcm_6 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_fcm_6)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2807838 97.99556 9.176667 71.88778 6.878889 0.3166667 1310238 62.95444 2130.487 70.38000 8.393333
C2 2886751 99.21667 9.390000 72.57667 7.440000 0.3666667 1292415 62.84333 2134.020 72.11000 12.706667
C4 2403749 94.00571 8.675714 70.94571 11.982857 0.3657143 1061486 62.24143 2133.286 70.45714 7.570000
C5 2795781 98.12600 10.110000 77.00400 6.950000 0.3720000 1742945 68.06800 2151.124 72.43000 12.176000
C6 2626628 94.77300 8.676000 67.03900 15.165000 0.3500000 1077314 54.18300 1940.277 67.62900 8.240000
knitr::kable(summary2022_fcm_6)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C1 2464809 93.53714 8.398571 72.76000 20.91143 0.7487143 1202113 62.85260 2066.146 71.68857 4.668571
C2 4082839 98.86600 9.902000 76.39800 12.55200 0.7430000 1842300 65.75160 2094.250 72.47000 7.934000
C3 2705749 97.96200 9.148000 73.43800 12.72600 0.6326000 1408558 60.14844 2040.923 71.71000 4.946000
C4 2918778 93.14000 7.732000 65.98200 38.20000 0.7602000 1177166 55.04995 1959.405 66.86000 3.808000
C5 2819309 98.87000 8.830000 72.18833 13.98000 0.6293333 1351052 66.41396 2138.527 70.36000 4.883333
C6 2512405 98.25500 9.141667 70.89667 25.13167 0.6111667 1138950 57.48695 1964.342 69.17167 4.793333
pheatmap(as.matrix(summary2020_fcm_6[,-1]), main = "Profil Cluster FCM - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_fcm_6[,-1]), main = "Profil Cluster FCM - 2022", cluster_rows = FALSE)

DBSCAN

2 Min

table(DBSCAN2020 = db_2020_2$cluster)
## DBSCAN2020
##  0  1 
##  3 31
table(DBSCAN2022 = db_2022_2$cluster)
## DBSCAN2022
##  0  1 
##  2 32
hasil_2020_2 <- data.frame(Provinsi = data2020$Provinsi, Cluster = db_2020_2$cluster)
hasil_2022_2 <- data.frame(Provinsi = data2022$Provinsi, Cluster = db_2022_2$cluster)

gabung <- merge(hasil_2020_2, hasil_2022_2, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            1            1
## 2                       Bali            1            1
## 3                     Banten            1            1
## 4                   Bengkulu            1            1
## 5              DI Yogyakarta            0            1
## 6                DKI Jakarta            1            1
## 7                  Gorontalo            1            1
## 8                      Jambi            1            1
## 9                 Jawa Barat            1            1
## 10               Jawa Tengah            1            1
## 11                Jawa Timur            1            1
## 12          Kalimantan Barat            1            1
## 13        Kalimantan Selatan            1            1
## 14         Kalimantan Tengah            1            1
## 15          Kalimantan Timur            1            1
## 16          Kalimantan Utara            1            1
## 17 Kepulauan Bangka Belitung            1            1
## 18            Kepulauan Riau            1            1
## 19                   Lampung            1            1
## 20                    Maluku            1            1
## 21              Maluku Utara            1            1
## 22       Nusa Tenggara Barat            0            0
## 23       Nusa Tenggara Timur            1            1
## 24                     Papua            0            0
## 25               Papua Barat            1            1
## 26                      Riau            1            1
## 27            Sulawesi Barat            1            1
## 28          Sulawesi Selatan            1            1
## 29           Sulawesi Tengah            1            1
## 30         Sulawesi Tenggara            1            1
## 31            Sulawesi Utara            1            1
## 32            Sumatera Barat            1            1
## 33          Sumatera Sełatan            1            1
## 34            Sumatera Utara            1            1
data2020_db_2 <- cbind(data2020[, -1], Cluster = db_2020_2$cluster)
data2022_db_2 <- cbind(data2022[, -1], Cluster = db_2022_2$cluster)

summary2020_db_2 <- data2020_db_2 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_db_2 <- data2022_db_2 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_db_2)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C0 2488730 86.86333 8.330000 69.55333 17.630000 0.4000000 1270483 63.04333 2167.540 69.09667 6.410000
C1 2694711 97.27161 9.155161 71.22871 9.729677 0.3441935 1250845 60.76935 2069.845 70.13226 9.377419
knitr::kable(summary2022_db_2)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C0 3195905 85.08000 7.315000 65.42500 40.43000 0.7730000 1307491 59.94264 2181.239 66.65000 3.760000
C1 2855905 97.41219 8.934375 72.37719 19.29938 0.6817812 1339998 61.50745 2037.208 70.65344 5.208125
pheatmap(as.matrix(summary2020_db_2[,-1]), main = "Profil Cluster DBSCAN - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_db_2[,-1]), main = "Profil Cluster DBSCAN - 2022", cluster_rows = FALSE)

4 Min

table(DBSCAN2020 = db_2020_4$cluster)
## DBSCAN2020
##  0  1 
##  3 31
table(DBSCAN2022 = db_2022_4$cluster)
## DBSCAN2022
##  0  1 
##  2 32
hasil_2020_4 <- data.frame(Provinsi = data2020$Provinsi, Cluster = db_2020_4$cluster)
hasil_2022_4 <- data.frame(Provinsi = data2022$Provinsi, Cluster = db_2022_4$cluster)

gabung <- merge(hasil_2020_4, hasil_2022_4, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            1            1
## 2                       Bali            1            1
## 3                     Banten            1            1
## 4                   Bengkulu            1            1
## 5              DI Yogyakarta            0            1
## 6                DKI Jakarta            1            1
## 7                  Gorontalo            1            1
## 8                      Jambi            1            1
## 9                 Jawa Barat            1            1
## 10               Jawa Tengah            1            1
## 11                Jawa Timur            1            1
## 12          Kalimantan Barat            1            1
## 13        Kalimantan Selatan            1            1
## 14         Kalimantan Tengah            1            1
## 15          Kalimantan Timur            1            1
## 16          Kalimantan Utara            1            1
## 17 Kepulauan Bangka Belitung            1            1
## 18            Kepulauan Riau            1            1
## 19                   Lampung            1            1
## 20                    Maluku            1            1
## 21              Maluku Utara            1            1
## 22       Nusa Tenggara Barat            0            0
## 23       Nusa Tenggara Timur            1            1
## 24                     Papua            0            0
## 25               Papua Barat            1            1
## 26                      Riau            1            1
## 27            Sulawesi Barat            1            1
## 28          Sulawesi Selatan            1            1
## 29           Sulawesi Tengah            1            1
## 30         Sulawesi Tenggara            1            1
## 31            Sulawesi Utara            1            1
## 32            Sumatera Barat            1            1
## 33          Sumatera Sełatan            1            1
## 34            Sumatera Utara            1            1
data2020_db_4 <- cbind(data2020[, -1], Cluster = db_2020_4$cluster)
data2022_db_4 <- cbind(data2022[, -1], Cluster = db_2022_4$cluster)

summary2020_db_4 <- data2020_db_4 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_db_4 <- data2022_db_4 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_db_4)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C0 2488730 86.86333 8.330000 69.55333 17.630000 0.4000000 1270483 63.04333 2167.540 69.09667 6.410000
C1 2694711 97.27161 9.155161 71.22871 9.729677 0.3441935 1250845 60.76935 2069.845 70.13226 9.377419
knitr::kable(summary2022_db_4)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C0 3195905 85.08000 7.315000 65.42500 40.43000 0.7730000 1307491 59.94264 2181.239 66.65000 3.760000
C1 2855905 97.41219 8.934375 72.37719 19.29938 0.6817812 1339998 61.50745 2037.208 70.65344 5.208125
pheatmap(as.matrix(summary2020_db_4[,-1]), main = "Profil Cluster DBSCAN - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_db_4[,-1]), main = "Profil Cluster DBSCAN - 2022", cluster_rows = FALSE)

6 Min

table(DBSCAN2020 = db_2020_6$cluster)
## DBSCAN2020
##  0  1 
##  4 30
table(DBSCAN2022 = db_2022_6$cluster)
## DBSCAN2022
##  0  1 
##  4 30
hasil_2020_6 <- data.frame(Provinsi = data2020$Provinsi, Cluster = db_2020_6$cluster)
hasil_2022_6 <- data.frame(Provinsi = data2022$Provinsi, Cluster = db_2022_6$cluster)

gabung <- merge(hasil_2020_6, hasil_2022_6, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
##                     Provinsi Cluster_2020 Cluster_2022
## 1                       Aceh            1            1
## 2                       Bali            1            1
## 3                     Banten            1            1
## 4                   Bengkulu            1            1
## 5              DI Yogyakarta            0            1
## 6                DKI Jakarta            1            0
## 7                  Gorontalo            1            1
## 8                      Jambi            1            1
## 9                 Jawa Barat            1            1
## 10               Jawa Tengah            1            1
## 11                Jawa Timur            1            1
## 12          Kalimantan Barat            1            1
## 13        Kalimantan Selatan            1            1
## 14         Kalimantan Tengah            1            1
## 15          Kalimantan Timur            1            1
## 16          Kalimantan Utara            1            1
## 17 Kepulauan Bangka Belitung            1            1
## 18            Kepulauan Riau            1            0
## 19                   Lampung            1            1
## 20                    Maluku            1            1
## 21              Maluku Utara            1            1
## 22       Nusa Tenggara Barat            0            0
## 23       Nusa Tenggara Timur            1            1
## 24                     Papua            0            0
## 25               Papua Barat            0            1
## 26                      Riau            1            1
## 27            Sulawesi Barat            1            1
## 28          Sulawesi Selatan            1            1
## 29           Sulawesi Tengah            1            1
## 30         Sulawesi Tenggara            1            1
## 31            Sulawesi Utara            1            1
## 32            Sumatera Barat            1            1
## 33          Sumatera Sełatan            1            1
## 34            Sumatera Utara            1            1
data2020_db_6 <- cbind(data2020[, -1], Cluster = db_2020_6$cluster)
data2022_db_6 <- cbind(data2022[, -1], Cluster = db_2022_6$cluster)

summary2020_db_6 <- data2020_db_6 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)

summary2022_db_6 <- data2022_db_6 %>%
  mutate(Cluster = paste0("C", Cluster)) %>%
  group_by(Cluster) %>%
  summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_db_6)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C0 2662604 89.52750 8.7475 68.43750 18.565000 0.395 1301616 61.07250 2101.332 68.32750 7.8675
C1 2678394 97.26333 9.1270 71.43333 9.341667 0.343 1246039 60.95633 2075.417 70.26933 9.2820
knitr::kable(summary2022_db_6)
Cluster UMP (Rp/bulan) Melek Huruf (%) RLS (%) IPM (%) Penduduk Miskin (%) Gini Ratio Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
C0 4028967 92.21750 9.077500 72.240 25.6075 0.7620000 1743008 63.17603 2132.304 69.28000 5.885000
C1 2722164 97.28267 8.807333 71.932 19.8670 0.6771667 1284096 61.18065 2034.130 70.56967 5.021333
pheatmap(as.matrix(summary2020_db_6[,-1]), main = "Profil Cluster DBSCAN - 2020", cluster_rows = FALSE)

pheatmap(as.matrix(summary2022_db_6[,-1]), main = "Profil Cluster DBSCAN - 2022", cluster_rows = FALSE)