Analisis segmentasi wilayah menggunakan data sosial ekonomi provinsi di Indonesia tahun 2020 dan 2022. Analisis dilakukan menggunakan metode clustering seperti KMeans, Fuzzy C-Means, dan DBSCAN, serta visualisasi hasil dan evaluasi cluster.
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.4.3
library(psych)
## Warning: package 'psych' was built under R version 4.4.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(pheatmap)
## Warning: package 'pheatmap' was built under R version 4.4.3
library(readr)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(cluster)
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.3
library(kernlab)
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:psych':
##
## alpha
## The following object is masked from 'package:ggplot2':
##
## alpha
library(ppclust)
## Warning: package 'ppclust' was built under R version 4.4.3
##
## Attaching package: 'ppclust'
## The following object is masked from 'package:psych':
##
## pca
library(dbscan)
## Warning: package 'dbscan' was built under R version 4.4.3
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:stats':
##
## as.dendrogram
data2020 <- read_excel("D:/Semester 4/Analisis Multivariat/projek/2020.xlsx") %>% type_convert()
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Provinsi = col_character(),
## `Melek Huruf (%)` = col_double(),
## `Gini Ratio` = col_double()
## )
data2022 <- read_excel("D:/Semester 4/Analisis Multivariat/projek/2022.xlsx") %>% type_convert()
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Provinsi = col_character()
## )
str(data2020)
## tibble [34 × 12] (S3: tbl_df/tbl/data.frame)
## $ Provinsi : chr [1:34] "Aceh" "Sumatera Utara" "Sumatera Barat" "Riau" ...
## $ UMP (Rp/bulan) : num [1:34] 3165030 2499422 2484041 2888563 2630161 ...
## $ Melek Huruf (%) : num [1:34] 98.2 99.2 99.2 99.2 98.2 ...
## $ RLS (%) : num [1:34] 9.71 9.83 9.34 9.47 8.97 ...
## $ IPM (%) : num [1:34] 72 71.8 72.4 72.7 71.3 ...
## $ Penduduk Miskin (%) : num [1:34] 14.99 8.75 6.28 6.82 7.58 ...
## $ Gini Ratio : num [1:34] 0.32 0.32 0.31 0.33 0.32 0.34 0.33 0.33 0.26 0.34 ...
## $ Pengeluaran Kapita (Rp/bulan): num [1:34] 1080171 1124253 1235050 1340446 1126690 ...
## $ Konsumsi Protein (g/hari) : num [1:34] 60.9 62.6 57.9 59.4 57.9 ...
## $ Konsumsi Kalori (kkal/hari) : num [1:34] 2091 2122 2110 2076 2081 ...
## $ Estimasi Harapan Hidup : num [1:34] 69.9 69.1 69.5 71.6 71.2 ...
## $ TPT (%) : num [1:34] 11.65 10.96 10.76 11.12 7.58 ...
str(data2022)
## tibble [34 × 12] (S3: tbl_df/tbl/data.frame)
## $ Provinsi : chr [1:34] "Aceh" "Sumatera Utara" "Sumatera Barat" "Riau" ...
## $ UMP (Rp/bulan) : num [1:34] 2224915 2405744 2536968 2596299 2393405 ...
## $ Melek Huruf (%) : num [1:34] 98.2 99.1 99.3 99.2 98.1 ...
## $ RLS (%) : num [1:34] 9.44 9.71 9.18 9.22 8.68 ...
## $ IPM (%) : num [1:34] 72.8 72.7 73.3 73.5 72.1 ...
## $ Penduduk Miskin (%) : num [1:34] 29.4 16.8 12 13.6 15.3 ...
## $ Gini Ratio : num [1:34] 0.602 0.638 0.592 0.649 0.655 0.669 0.63 0.627 0.491 0.667 ...
## $ Pengeluaran Kapita (Rp/bulan): num [1:34] 1180133 1216497 1342986 1425171 1261837 ...
## $ Konsumsi Protein (g/hari) : num [1:34] 61.2 64.7 59.2 59.7 60.7 ...
## $ Konsumsi Kalori (kkal/hari) : num [1:34] 2064 2123 2109 2022 2070 ...
## $ Estimasi Harapan Hidup : num [1:34] 70.2 69.6 69.9 72 71.5 ...
## $ TPT (%) : num [1:34] 5.97 5.47 6.17 4.4 4.7 4.74 3.39 4.31 4.18 8.02 ...
summary(data2020)
## Provinsi UMP (Rp/bulan) Melek Huruf (%) RLS (%)
## Length:34 Min. :1765608 Min. :77.90 Min. : 6.960
## Class :character 1st Qu.:2407604 1st Qu.:94.85 1st Qu.: 8.495
## Mode :character Median :2595930 Median :98.14 Median : 9.145
## Mean :2676536 Mean :96.35 Mean : 9.082
## 3rd Qu.:3004238 3rd Qu.:98.92 3rd Qu.: 9.650
## Max. :4416187 Max. :99.79 Max. :11.170
## IPM (%) Penduduk Miskin (%) Gini Ratio
## Min. :60.44 Min. : 3.780 Min. :0.2600
## 1st Qu.:69.50 1st Qu.: 6.405 1st Qu.:0.3225
## Median :71.42 Median : 8.735 Median :0.3400
## Mean :71.08 Mean :10.427 Mean :0.3491
## 3rd Qu.:72.31 3rd Qu.:12.855 3rd Qu.:0.3775
## Max. :80.77 Max. :26.640 Max. :0.4300
## Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari)
## Min. : 794361 Min. :46.52
## 1st Qu.:1065357 1st Qu.:57.76
## Median :1133383 Median :60.57
## Mean :1252578 Mean :60.97
## 3rd Qu.:1381372 3rd Qu.:64.65
## Max. :2257991 Max. :73.66
## Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
## Min. :1772 Min. :65.06 Min. : 2.790
## 1st Qu.:2025 1st Qu.:68.67 1st Qu.: 6.772
## Median :2086 Median :69.96 Median : 8.480
## Mean :2078 Mean :70.04 Mean : 9.116
## 3rd Qu.:2141 3rd Qu.:71.53 3rd Qu.:11.165
## Max. :2442 Max. :74.99 Max. :15.820
summary(data2022)
## Provinsi UMP (Rp/bulan) Melek Huruf (%) RLS (%)
## Length:34 Min. :2010212 Min. :81.19 Min. : 7.020
## Class :character 1st Qu.:2400474 1st Qu.:95.25 1st Qu.: 8.088
## Mode :character Median :2641549 Median :98.13 Median : 8.835
## Mean :2875905 Mean :96.69 Mean : 8.839
## 3rd Qu.:3194514 3rd Qu.:98.92 3rd Qu.: 9.360
## Max. :5589155 Max. :99.81 Max. :11.310
## IPM (%) Penduduk Miskin (%) Gini Ratio
## Min. :61.39 Min. : 9.06 Min. :0.4910
## 1st Qu.:70.23 1st Qu.:12.64 1st Qu.:0.6262
## Median :72.19 Median :17.02 Median :0.6680
## Mean :71.97 Mean :20.54 Mean :0.6871
## 3rd Qu.:73.22 3rd Qu.:24.43 3rd Qu.:0.7415
## Max. :81.65 Max. :53.36 Max. :0.8980
## Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari)
## Min. : 884102 Min. :45.07
## 1st Qu.:1144451 1st Qu.:58.45
## Median :1234662 Median :60.93
## Mean :1338086 Mean :61.42
## 3rd Qu.:1447032 3rd Qu.:64.66
## Max. :2525347 Max. :74.82
## Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup TPT (%)
## Min. :1837 Min. :65.63 Min. :3.110
## 1st Qu.:1962 1st Qu.:68.98 1st Qu.:3.985
## Median :2040 Median :70.44 Median :4.775
## Mean :2046 Mean :70.42 Mean :5.123
## 3rd Qu.:2116 3rd Qu.:71.90 3rd Qu.:5.923
## Max. :2460 Max. :75.08 Max. :8.530
data2020_long <- melt(data2020, id.vars = "Provinsi")
ggplot(data2020_long, aes(x = value)) +
geom_histogram(bins = 30, fill = "skyblue") +
facet_wrap(~variable, scales = "free") +
ggtitle("Histogram Data 2020")
data2022_long <- melt(data2022, id.vars = "Provinsi")
ggplot(data2022_long, aes(x = value)) +
geom_histogram(bins = 30, fill = "salmon") +
facet_wrap(~variable, scales = "free") +
ggtitle("Histogram Data 2022")
pheatmap(cor(data2020[,-1], use = "complete.obs"), main = "Korelasi 2020")
pheatmap(cor(data2022[,-1], use = "complete.obs"), main = "Korelasi 2022")
pheatmap(cor(data2020[,-1], use = "complete.obs"), main = "Korelasi 2020")
pheatmap(cor(data2022[,-1], use = "complete.obs"), main = "Korelasi 2022")
colSums(is.na(data2020))
## Provinsi UMP (Rp/bulan)
## 0 0
## Melek Huruf (%) RLS (%)
## 0 0
## IPM (%) Penduduk Miskin (%)
## 0 0
## Gini Ratio Pengeluaran Kapita (Rp/bulan)
## 0 0
## Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari)
## 0 0
## Estimasi Harapan Hidup TPT (%)
## 0 0
colSums(is.na(data2022))
## Provinsi UMP (Rp/bulan)
## 0 0
## Melek Huruf (%) RLS (%)
## 0 0
## IPM (%) Penduduk Miskin (%)
## 0 0
## Gini Ratio Pengeluaran Kapita (Rp/bulan)
## 0 0
## Konsumsi Protein (g/hari) Konsumsi Kalori (kkal/hari)
## 0 0
## Estimasi Harapan Hidup TPT (%)
## 0 0
ggplot(data2020_long, aes(x = "", y = value)) +
geom_boxplot(fill = "lightblue") +
facet_wrap(~variable, scales = "free_y") +
ggtitle("Boxplot Data 2020") +
theme(axis.text.x = element_blank())
ggplot(data2022_long, aes(x = "", y = value)) +
geom_boxplot(fill = "lightcoral") +
facet_wrap(~variable, scales = "free_y") +
ggtitle("Boxplot Data 2022") +
theme(axis.text.x = element_blank())
outlier_count <- function(df) {
sapply(df %>% select(where(is.numeric)), function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower <- Q1 - 1.5 * IQR
upper <- Q3 + 1.5 * IQR
sum(x < lower | x > upper, na.rm = TRUE)
})
}
cat("Outlier 2020:\n")
## Outlier 2020:
print(outlier_count(data2020))
## UMP (Rp/bulan) Melek Huruf (%)
## 1 2
## RLS (%) IPM (%)
## 0 5
## Penduduk Miskin (%) Gini Ratio
## 1 0
## Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari)
## 1 1
## Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup
## 3 0
## TPT (%)
## 0
cat("Outlier 2022:\n")
## Outlier 2022:
print(outlier_count(data2022))
## UMP (Rp/bulan) Melek Huruf (%)
## 1 2
## RLS (%) IPM (%)
## 1 3
## Penduduk Miskin (%) Gini Ratio
## 2 0
## Pengeluaran Kapita (Rp/bulan) Konsumsi Protein (g/hari)
## 1 2
## Konsumsi Kalori (kkal/hari) Estimasi Harapan Hidup
## 1 0
## TPT (%)
## 0
handle_outliers <- function(df) {
for (col in names(df)) {
if (is.numeric(df[[col]])) {
Q1 <- quantile(df[[col]], 0.25, na.rm = TRUE)
Q3 <- quantile(df[[col]], 0.75, na.rm = TRUE)
IQR <- Q3 - Q1
lower <- Q1 - 1.5 * IQR
upper <- Q3 + 1.5 * IQR
df[[col]] <- pmin(pmax(df[[col]], lower), upper)
}
}
return(df)
}
data2020_clean <- handle_outliers(data2020[,-1])
data2022_clean <- handle_outliers(data2022[,-1])
ggplot(melt(cbind(Provinsi = data2020$Provinsi, data2020_clean), id.vars = "Provinsi"),
aes(x = "", y = value)) +
geom_boxplot(fill = "lightblue") +
facet_wrap(~variable, scales = "free_y") +
ggtitle("Setelah Outlier Handling 2020") +
theme(axis.text.x = element_blank())
ggplot(melt(cbind(Provinsi = data2022$Provinsi, data2022_clean), id.vars = "Provinsi"),
aes(x = "", y = value)) +
geom_boxplot(fill = "lightcoral") +
facet_wrap(~variable, scales = "free_y") +
ggtitle("Setelah Outlier Handling 2022") +
theme(axis.text.x = element_blank())
scale_z <- function(df) {
as.data.frame(scale(df))
}
data2020_scaled <- scale(data2020_clean)
data2022_scaled <- scale(data2022_clean)
set.seed(123)
km_2020_2 <- kmeans(data2020_scaled, centers = 2)
km_2022_2 <- kmeans(data2022_scaled, centers = 2)
set.seed(123)
km_2020_4 <- kmeans(data2020_scaled, centers = 4)
km_2022_4 <- kmeans(data2022_scaled, centers = 4)
set.seed(123)
km_2020_6 <- kmeans(data2020_scaled, centers = 6)
km_2022_6 <- kmeans(data2022_scaled, centers = 6)
fcm_2020_2 <- fcm(data2020_scaled, centers = 2)
fcm_2022_2 <- fcm(data2022_scaled, centers = 2)
fcm_2020_4 <- fcm(data2020_scaled, centers = 4)
fcm_2022_4 <- fcm(data2022_scaled, centers = 4)
fcm_2020_6 <- fcm(data2020_scaled, centers = 6)
fcm_2022_6 <- fcm(data2022_scaled, centers = 6)
db_2020_2 <- dbscan(data2020_scaled, eps = 3.5, minPts = 2)
db_2022_2 <- dbscan(data2022_scaled, eps = 3.5, minPts = 2)
db_2020_4 <- dbscan(data2020_scaled, eps = 3.5, minPts = 4)
db_2022_4 <- dbscan(data2022_scaled, eps = 3.5, minPts = 4)
db_2020_6 <- dbscan(data2020_scaled, eps = 3.5, minPts = 6)
db_2022_6 <- dbscan(data2022_scaled, eps = 3.5, minPts = 6)
pca_2020 <- prcomp(data2020_scaled)
pca_2022 <- prcomp(data2022_scaled)
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(km_2020_2$cluster), addEllipses = TRUE, title = "PCA KMeans 2020")
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(km_2022_2$cluster), addEllipses = TRUE, title = "PCA KMeans 2022")
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(km_2020_4$cluster), addEllipses = TRUE, title = "PCA KMeans 2020")
## Too few points to calculate an ellipse
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(km_2022_4$cluster), addEllipses = TRUE, title = "PCA KMeans 2022")
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(km_2020_6$cluster), addEllipses = TRUE, title = "PCA KMeans 2020")
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(km_2022_6$cluster), addEllipses = TRUE, title = "PCA KMeans 2022")
## Too few points to calculate an ellipse
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(fcm_2020_2$cluster), addEllipses = TRUE, title = "PCA FCM 2020")
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(fcm_2022_2$cluster), addEllipses = TRUE, title = "PCA FCM 2022")
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(fcm_2020_4$cluster), addEllipses = TRUE, title = "PCA FCM 2020")
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(fcm_2022_4$cluster), addEllipses = TRUE, title = "PCA FCM 2022")
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(fcm_2020_6$cluster), addEllipses = TRUE, title = "PCA FCM 2020")
## Too few points to calculate an ellipse
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(fcm_2022_6$cluster), addEllipses = TRUE, title = "PCA FCM 2022")
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(db_2020_2$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2020")
## Too few points to calculate an ellipse
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(db_2022_2$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2022")
## Too few points to calculate an ellipse
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(db_2020_4$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2020")
## Too few points to calculate an ellipse
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(db_2022_4$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2022")
## Too few points to calculate an ellipse
fviz_pca_biplot(pca_2020, label = "var", habillage = as.factor(db_2020_6$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2020")
fviz_pca_biplot(pca_2022, label = "var", habillage = as.factor(db_2022_6$cluster), addEllipses = TRUE, title = "PCA DBSCAN 2022")
plot(silhouette(km_2020_2$cluster, dist(data2020_scaled)), main = "Silhouette KMeans 2020")
plot(silhouette(km_2022_2$cluster, dist(data2022_scaled)), main = "Silhouette KMeans 2022")
plot(silhouette(km_2020_4$cluster, dist(data2020_scaled)), main = "Silhouette KMeans 2020")
plot(silhouette(km_2022_4$cluster, dist(data2022_scaled)), main = "Silhouette KMeans 2022")
plot(silhouette(km_2020_6$cluster, dist(data2020_scaled)), main = "Silhouette KMeans 2020")
plot(silhouette(km_2022_6$cluster, dist(data2022_scaled)), main = "Silhouette KMeans 2022")
plot(silhouette(fcm_2020_2$cluster, dist(data2020_scaled)), main = "Silhouette FCM 2020")
plot(silhouette(fcm_2022_2$cluster, dist(data2022_scaled)), main = "Silhouette FCM 2022")
plot(silhouette(fcm_2020_4$cluster, dist(data2020_scaled)), main = "Silhouette FCM 2020")
plot(silhouette(fcm_2022_4$cluster, dist(data2022_scaled)), main = "Silhouette FCM 2022")
plot(silhouette(fcm_2020_6$cluster, dist(data2020_scaled)), main = "Silhouette FCM 2020")
plot(silhouette(fcm_2022_6$cluster, dist(data2022_scaled)), main = "Silhouette FCM 2022")
if (length(unique(db_2020_2$cluster)) > 1) {
plot(silhouette(db_2020_2$cluster, dist(data2020_scaled)), main = "Silhouette DBSCAN 2020")
} else {
cat("DBSCAN 2020: hanya 1 cluster, silhouette tidak dihitung.\n")
}
if (length(unique(db_2022_2$cluster)) > 1) {
plot(silhouette(db_2022_2$cluster, dist(data2022_scaled)), main = "Silhouette DBSCAN 2022")
} else {
cat("DBSCAN 2022: hanya 1 cluster, silhouette tidak dihitung.\n")
}
if (length(unique(db_2020_4$cluster)) > 1) {
plot(silhouette(db_2020_4$cluster, dist(data2020_scaled)), main = "Silhouette DBSCAN 2020")
} else {
cat("DBSCAN 2020: hanya 1 cluster, silhouette tidak dihitung.\n")
}
if (length(unique(db_2022_4$cluster)) > 1) {
plot(silhouette(db_2022_4$cluster, dist(data2022_scaled)), main = "Silhouette DBSCAN 2022")
} else {
cat("DBSCAN 2022: hanya 1 cluster, silhouette tidak dihitung.\n")
}
if (length(unique(db_2020_6$cluster)) > 1) {
plot(silhouette(db_2020_6$cluster, dist(data2020_scaled)), main = "Silhouette DBSCAN 2020")
} else {
cat("DBSCAN 2020: hanya 1 cluster, silhouette tidak dihitung.\n")
}
if (length(unique(db_2022_6$cluster)) > 1) {
plot(silhouette(db_2022_6$cluster, dist(data2022_scaled)), main = "Silhouette DBSCAN 2022")
} else {
cat("DBSCAN 2022: hanya 1 cluster, silhouette tidak dihitung.\n")
}
table(KMeans2020 = km_2020_2$cluster)
## KMeans2020
## 1 2
## 17 17
table(KMeans2022 = km_2022_2$cluster)
## KMeans2022
## 1 2
## 8 26
hasil_2020_2 <- data.frame(Provinsi = data2020$Provinsi, Cluster = km_2020_2$cluster)
hasil_2022_2 <- data.frame(Provinsi = data2022$Provinsi, Cluster = km_2022_2$cluster)
gabung <- merge(hasil_2020_2, hasil_2022_2, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 2 2
## 2 Bali 2 1
## 3 Banten 2 1
## 4 Bengkulu 1 2
## 5 DI Yogyakarta 2 1
## 6 DKI Jakarta 2 1
## 7 Gorontalo 1 2
## 8 Jambi 2 2
## 9 Jawa Barat 2 1
## 10 Jawa Tengah 1 2
## 11 Jawa Timur 1 2
## 12 Kalimantan Barat 1 2
## 13 Kalimantan Selatan 2 2
## 14 Kalimantan Tengah 2 2
## 15 Kalimantan Timur 2 1
## 16 Kalimantan Utara 2 2
## 17 Kepulauan Bangka Belitung 2 2
## 18 Kepulauan Riau 2 1
## 19 Lampung 1 2
## 20 Maluku 1 2
## 21 Maluku Utara 1 2
## 22 Nusa Tenggara Barat 1 2
## 23 Nusa Tenggara Timur 1 2
## 24 Papua 1 2
## 25 Papua Barat 1 2
## 26 Riau 2 2
## 27 Sulawesi Barat 1 2
## 28 Sulawesi Selatan 1 2
## 29 Sulawesi Tengah 1 2
## 30 Sulawesi Tenggara 1 2
## 31 Sulawesi Utara 2 1
## 32 Sumatera Barat 2 2
## 33 Sumatera Sełatan 1 2
## 34 Sumatera Utara 2 2
data2020_km_2 <- cbind(data2020[, -1], Cluster = km_2020_2$cluster)
data2022_km_2 <- cbind(data2022[, -1], Cluster = km_2022_2$cluster)
summary2020_km_2 <- data2020_km_2 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_km_2 <- data2022_km_2 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_km_2)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2534854 | 94.45706 | 8.675882 | 68.64765 | 13.854706 | 0.3564706 | 1070797 | 57.50118 | 2019.751 | 68.79353 | 7.964118 |
| C2 | 2818218 | 98.24941 | 9.488824 | 73.51412 | 6.998824 | 0.3417647 | 1434359 | 64.43882 | 2137.180 | 71.28824 | 10.267059 |
knitr::kable(summary2022_km_2)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 3589878 | 98.10250 | 9.791250 | 76.61 | 13.66375 | 0.7577500 | 1671399 | 65.10469 | 2099.495 | 72.76375 | 6.843750 |
| C2 | 2656221 | 96.25115 | 8.546154 | 70.54 | 22.65885 | 0.6654231 | 1235528 | 60.28023 | 2029.122 | 69.69615 | 4.593462 |
pheatmap(as.matrix(summary2020_km_2[,-1]), main = "Profil Cluster KMeans - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_km_2[,-1]), main = "Profil Cluster KMeans - 2022", cluster_rows = FALSE)
table(KMeans2020 = km_2020_4$cluster)
## KMeans2020
## 1 2 3 4
## 3 18 5 8
table(KMeans2022 = km_2022_4$cluster)
## KMeans2022
## 1 2 3 4
## 5 7 16 6
hasil_2020_4 <- data.frame(Provinsi = data2020$Provinsi, Cluster = km_2020_4$cluster)
hasil_2022_4 <- data.frame(Provinsi = data2022$Provinsi, Cluster = km_2022_4$cluster)
gabung <- merge(hasil_2020_4, hasil_2022_4, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 2 3
## 2 Bali 4 2
## 3 Banten 4 4
## 4 Bengkulu 2 3
## 5 DI Yogyakarta 4 2
## 6 DKI Jakarta 4 4
## 7 Gorontalo 3 1
## 8 Jambi 2 3
## 9 Jawa Barat 4 4
## 10 Jawa Tengah 2 2
## 11 Jawa Timur 2 2
## 12 Kalimantan Barat 2 3
## 13 Kalimantan Selatan 2 3
## 14 Kalimantan Tengah 2 3
## 15 Kalimantan Timur 4 4
## 16 Kalimantan Utara 2 3
## 17 Kepulauan Bangka Belitung 2 3
## 18 Kepulauan Riau 4 4
## 19 Lampung 2 3
## 20 Maluku 1 3
## 21 Maluku Utara 1 3
## 22 Nusa Tenggara Barat 3 2
## 23 Nusa Tenggara Timur 3 1
## 24 Papua 3 1
## 25 Papua Barat 1 1
## 26 Riau 2 3
## 27 Sulawesi Barat 3 1
## 28 Sulawesi Selatan 2 2
## 29 Sulawesi Tengah 2 3
## 30 Sulawesi Tenggara 2 2
## 31 Sulawesi Utara 4 4
## 32 Sumatera Barat 2 3
## 33 Sumatera Sełatan 2 3
## 34 Sumatera Utara 2 3
data2020_km_4 <- cbind(data2020[, -1], Cluster = km_2020_4$cluster)
data2022_km_4 <- cbind(data2022[, -1], Cluster = km_2022_4$cluster)
summary2020_km_4 <- data2020_km_4 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_km_4 <- data2022_km_4 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_km_4)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2836905 | 98.57000 | 9.873333 | 67.69000 | 15.196667 | 0.3366667 | 1190586 | 51.63333 | 1816.087 | 66.77667 | 11.770000 |
| C2 | 2635704 | 96.98444 | 8.938889 | 71.08167 | 9.293889 | 0.3316667 | 1172157 | 60.57889 | 2078.936 | 70.53389 | 8.521667 |
| C3 | 2560943 | 90.14600 | 7.944000 | 65.73400 | 17.520000 | 0.3780000 | 1031176 | 58.31800 | 2084.818 | 66.48800 | 6.112000 |
| C4 | 2780515 | 97.98125 | 9.820000 | 75.69250 | 6.753750 | 0.3750000 | 1595149 | 67.00875 | 2171.830 | 72.37625 | 11.333750 |
knitr::kable(summary2022_km_4)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2918778 | 93.14000 | 7.732000 | 65.98200 | 38.20000 | 0.7602000 | 1177166 | 55.04995 | 1959.405 | 66.86000 | 3.808000 |
| C2 | 2471170 | 93.76143 | 8.655714 | 73.87571 | 20.27714 | 0.7630000 | 1232112 | 63.30735 | 2102.354 | 71.91429 | 4.665714 |
| C3 | 2638739 | 98.19875 | 8.880625 | 71.50500 | 18.00750 | 0.6113750 | 1283506 | 60.80643 | 2025.864 | 70.13000 | 4.768750 |
| C4 | 3944812 | 99.02333 | 9.865000 | 75.96667 | 12.89667 | 0.7398333 | 1741367 | 66.13659 | 2104.301 | 72.40500 | 7.696667 |
pheatmap(as.matrix(summary2020_km_4[,-1]), main = "Profil Cluster KMeans - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_km_4[,-1]), main = "Profil Cluster KMeans - 2022", cluster_rows = FALSE)
table(KMeans2020 = km_2020_6$cluster)
## KMeans2020
## 1 2 3 4 5 6
## 3 9 4 2 10 6
table(KMeans2022 = km_2022_6$cluster)
## KMeans2022
## 1 2 3 4 5 6
## 6 6 8 2 8 4
hasil_2020_6 <- data.frame(Provinsi = data2020$Provinsi, Cluster = km_2020_6$cluster)
hasil_2022_6 <- data.frame(Provinsi = data2022$Provinsi, Cluster = km_2022_6$cluster)
gabung <- merge(hasil_2020_6, hasil_2022_6, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 5 3
## 2 Bali 4 1
## 3 Banten 6 2
## 4 Bengkulu 2 3
## 5 DI Yogyakarta 4 1
## 6 DKI Jakarta 6 2
## 7 Gorontalo 2 6
## 8 Jambi 5 5
## 9 Jawa Barat 6 2
## 10 Jawa Tengah 2 1
## 11 Jawa Timur 2 1
## 12 Kalimantan Barat 2 3
## 13 Kalimantan Selatan 5 5
## 14 Kalimantan Tengah 5 5
## 15 Kalimantan Timur 6 2
## 16 Kalimantan Utara 5 3
## 17 Kepulauan Bangka Belitung 5 5
## 18 Kepulauan Riau 6 2
## 19 Lampung 2 3
## 20 Maluku 1 3
## 21 Maluku Utara 1 3
## 22 Nusa Tenggara Barat 3 4
## 23 Nusa Tenggara Timur 3 6
## 24 Papua 3 6
## 25 Papua Barat 1 6
## 26 Riau 5 5
## 27 Sulawesi Barat 3 4
## 28 Sulawesi Selatan 2 1
## 29 Sulawesi Tengah 2 3
## 30 Sulawesi Tenggara 2 1
## 31 Sulawesi Utara 6 2
## 32 Sumatera Barat 5 5
## 33 Sumatera Sełatan 5 5
## 34 Sumatera Utara 5 5
data2020_km_6 <- cbind(data2020[, -1], Cluster = km_2020_6$cluster)
data2022_km_6 <- cbind(data2022[, -1], Cluster = km_2022_6$cluster)
summary2020_km_6 <- data2020_km_6 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_km_6 <- data2022_km_6 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_km_6)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2836905 | 98.57000 | 9.873333 | 67.69000 | 15.196667 | 0.3366667 | 1190586 | 51.63333 | 1816.087 | 66.77667 | 11.770000 |
| C2 | 2368987 | 95.48111 | 8.636667 | 70.43778 | 11.655556 | 0.3588889 | 1058017 | 58.88889 | 2042.296 | 70.54556 | 7.553333 |
| C3 | 2554453 | 87.99500 | 7.865000 | 64.99750 | 18.095000 | 0.3700000 | 1021900 | 58.19250 | 2094.770 | 66.09250 | 5.887500 |
| C4 | 2129567 | 94.94500 | 9.630000 | 77.73500 | 8.030000 | 0.4000000 | 1460819 | 67.78000 | 2219.840 | 73.56000 | 4.430000 |
| C5 | 2870869 | 98.51400 | 9.143000 | 71.42100 | 7.761000 | 0.3150000 | 1264495 | 61.92400 | 2108.519 | 70.27700 | 9.242000 |
| C6 | 2997498 | 98.99333 | 9.883333 | 75.01167 | 6.328333 | 0.3666667 | 1639925 | 66.75167 | 2155.827 | 71.98167 | 13.635000 |
knitr::kable(summary2022_km_6)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2547997 | 94.56000 | 8.83000 | 74.61167 | 19.07333 | 0.7656667 | 1244020 | 61.38878 | 2042.673 | 72.72167 | 4.790000 |
| C2 | 3944812 | 99.02333 | 9.86500 | 75.96667 | 12.89667 | 0.7398333 | 1741367 | 66.13659 | 2104.301 | 72.40500 | 7.696667 |
| C3 | 2628997 | 97.65000 | 8.96375 | 70.73000 | 22.24500 | 0.6042500 | 1204563 | 57.85537 | 1942.029 | 69.83125 | 4.780000 |
| C4 | 2087289 | 91.39500 | 7.84500 | 68.19000 | 25.58500 | 0.7400000 | 1058284 | 68.10980 | 2271.715 | 66.35000 | 3.515000 |
| C5 | 2648481 | 98.74750 | 8.79750 | 72.28000 | 13.77000 | 0.6185000 | 1362450 | 63.75748 | 2109.698 | 70.42875 | 4.757500 |
| C6 | 3107381 | 92.97000 | 7.64500 | 65.74750 | 41.83250 | 0.7670000 | 1232481 | 53.46222 | 1928.508 | 67.16750 | 3.982500 |
pheatmap(as.matrix(summary2020_km_6[,-1]), main = "Profil Cluster KMeans - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_km_6[,-1]), main = "Profil Cluster KMeans - 2022", cluster_rows = FALSE)
table(FuzzyCMeans2020 = fcm_2020_2$cluster)
## FuzzyCMeans2020
## 1 2
## 17 17
table(FuzzyCMeans2022 = fcm_2022_2$cluster)
## FuzzyCMeans2022
## 1 2
## 16 18
hasil_2020_2 <- data.frame(Provinsi = data2020$Provinsi, Cluster = fcm_2020_2$cluster)
hasil_2022_2 <- data.frame(Provinsi = data2022$Provinsi, Cluster = fcm_2022_2$cluster)
gabung <- merge(hasil_2020_2, hasil_2022_2, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 2 2
## 2 Bali 2 1
## 3 Banten 2 1
## 4 Bengkulu 1 2
## 5 DI Yogyakarta 2 1
## 6 DKI Jakarta 2 1
## 7 Gorontalo 1 2
## 8 Jambi 2 2
## 9 Jawa Barat 2 1
## 10 Jawa Tengah 1 2
## 11 Jawa Timur 1 2
## 12 Kalimantan Barat 1 2
## 13 Kalimantan Selatan 2 1
## 14 Kalimantan Tengah 2 1
## 15 Kalimantan Timur 2 1
## 16 Kalimantan Utara 2 1
## 17 Kepulauan Bangka Belitung 2 1
## 18 Kepulauan Riau 2 1
## 19 Lampung 1 2
## 20 Maluku 1 2
## 21 Maluku Utara 1 2
## 22 Nusa Tenggara Barat 1 2
## 23 Nusa Tenggara Timur 1 2
## 24 Papua 1 2
## 25 Papua Barat 1 2
## 26 Riau 2 1
## 27 Sulawesi Barat 1 2
## 28 Sulawesi Selatan 1 1
## 29 Sulawesi Tengah 1 2
## 30 Sulawesi Tenggara 1 2
## 31 Sulawesi Utara 2 1
## 32 Sumatera Barat 2 1
## 33 Sumatera Sełatan 1 2
## 34 Sumatera Utara 2 1
data2020_fcm_2 <- cbind(data2020[, -1], Cluster = fcm_2020_2$cluster)
data2022_fcm_2 <- cbind(data2022[, -1], Cluster = fcm_2022_2$cluster)
summary2020_fcm_2 <- data2020_fcm_2 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_fcm_2 <- data2022_fcm_2 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_fcm_2)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2534854 | 94.45706 | 8.675882 | 68.64765 | 13.854706 | 0.3564706 | 1070797 | 57.50118 | 2019.751 | 68.79353 | 7.964118 |
| C2 | 2818218 | 98.24941 | 9.488824 | 73.51412 | 6.998824 | 0.3417647 | 1434359 | 64.43882 | 2137.180 | 71.28824 | 10.267059 |
knitr::kable(summary2022_fcm_2)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 3199865 | 98.06687 | 9.347500 | 74.54563 | 13.20125 | 0.685625 | 1535808 | 64.30794 | 2084.838 | 71.70562 | 5.858750 |
| C2 | 2587941 | 95.46000 | 8.387222 | 69.67722 | 27.06778 | 0.688500 | 1162332 | 58.84425 | 2010.873 | 69.27333 | 4.468889 |
pheatmap(as.matrix(summary2020_fcm_2[,-1]), main = "Profil Cluster FCM - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_fcm_2[,-1]), main = "Profil Cluster FCM - 2022", cluster_rows = FALSE)
table(FuzzyCMeans2020 = fcm_2020_4$cluster)
## FuzzyCMeans2020
## 1 2 3 4
## 9 7 8 10
table(FuzzyCMeans2022 = fcm_2022_4$cluster)
## FuzzyCMeans2022
## 1 2 3 4
## 9 8 6 11
hasil_2020_4 <- data.frame(Provinsi = data2020$Provinsi, Cluster = fcm_2020_4$cluster)
hasil_2022_4 <- data.frame(Provinsi = data2022$Provinsi, Cluster = fcm_2022_4$cluster)
gabung <- merge(hasil_2020_4, hasil_2022_4, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 1 4
## 2 Bali 3 4
## 3 Banten 3 3
## 4 Bengkulu 2 1
## 5 DI Yogyakarta 3 1
## 6 DKI Jakarta 3 3
## 7 Gorontalo 4 2
## 8 Jambi 1 4
## 9 Jawa Barat 3 3
## 10 Jawa Tengah 2 1
## 11 Jawa Timur 2 1
## 12 Kalimantan Barat 4 1
## 13 Kalimantan Selatan 1 4
## 14 Kalimantan Tengah 1 4
## 15 Kalimantan Timur 3 3
## 16 Kalimantan Utara 1 4
## 17 Kepulauan Bangka Belitung 1 4
## 18 Kepulauan Riau 3 3
## 19 Lampung 4 1
## 20 Maluku 4 2
## 21 Maluku Utara 4 1
## 22 Nusa Tenggara Barat 2 2
## 23 Nusa Tenggara Timur 4 2
## 24 Papua 4 2
## 25 Papua Barat 4 2
## 26 Riau 1 4
## 27 Sulawesi Barat 4 2
## 28 Sulawesi Selatan 2 1
## 29 Sulawesi Tengah 4 2
## 30 Sulawesi Tenggara 2 1
## 31 Sulawesi Utara 3 3
## 32 Sumatera Barat 1 4
## 33 Sumatera Sełatan 2 4
## 34 Sumatera Utara 1 4
data2020_fcm_4 <- cbind(data2020[, -1], Cluster = fcm_2020_4$cluster)
data2022_fcm_4 <- cbind(data2022[, -1], Cluster = fcm_2022_4$cluster)
summary2020_fcm_4 <- data2020_fcm_4 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_fcm_4 <- data2022_fcm_4 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_fcm_4)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2851731 | 98.48778 | 9.194444 | 71.57778 | 7.216667 | 0.3122222 | 1291435 | 62.15444 | 2106.380 | 70.32111 | 9.318889 |
| C2 | 2403749 | 94.00571 | 8.675714 | 70.94571 | 11.982857 | 0.3657143 | 1061486 | 62.24143 | 2133.286 | 70.45714 | 7.570000 |
| C3 | 2780515 | 97.98125 | 9.820000 | 75.69250 | 6.753750 | 0.3750000 | 1595149 | 67.00875 | 2171.830 | 72.37625 | 11.333750 |
| C4 | 2626628 | 94.77300 | 8.676000 | 67.03900 | 15.165000 | 0.3500000 | 1077314 | 54.18300 | 1940.277 | 67.62900 | 8.240000 |
knitr::kable(summary2022_fcm_4)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2527640 | 95.51333 | 8.612222 | 72.43778 | 20.38333 | 0.7043333 | 1183799 | 59.76360 | 1987.393 | 71.58000 | 4.604444 |
| C2 | 2743538 | 94.01500 | 8.168750 | 67.48375 | 34.41625 | 0.7210000 | 1162500 | 57.63758 | 2011.908 | 67.09375 | 4.133750 |
| C3 | 3944812 | 99.02333 | 9.865000 | 75.96667 | 12.89667 | 0.7398333 | 1741367 | 66.13659 | 2104.301 | 72.40500 | 7.696667 |
| C4 | 2674076 | 98.31545 | 8.952727 | 72.66455 | 14.75273 | 0.6197273 | 1372048 | 62.93918 | 2085.956 | 70.80091 | 4.862727 |
pheatmap(as.matrix(summary2020_fcm_4[,-1]), main = "Profil Cluster FCM - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_fcm_4[,-1]), main = "Profil Cluster FCM - 2022", cluster_rows = FALSE)
table(FuzzyCMeans2020 = fcm_2020_6$cluster)
## FuzzyCMeans2020
## 1 2 4 5 6
## 9 3 7 5 10
table(FuzzyCMeans2022 = fcm_2022_6$cluster)
## FuzzyCMeans2022
## 1 2 3 4 5 6
## 7 5 5 5 6 6
hasil_2020_6 <- data.frame(Provinsi = data2020$Provinsi, Cluster = fcm_2020_6$cluster)
hasil_2022_6 <- data.frame(Provinsi = data2022$Provinsi, Cluster = fcm_2022_6$cluster)
gabung <- merge(hasil_2020_6, hasil_2022_6, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 1 6
## 2 Bali 1 3
## 3 Banten 5 2
## 4 Bengkulu 4 6
## 5 DI Yogyakarta 5 1
## 6 DKI Jakarta 5 2
## 7 Gorontalo 6 4
## 8 Jambi 1 3
## 9 Jawa Barat 2 2
## 10 Jawa Tengah 4 1
## 11 Jawa Timur 4 1
## 12 Kalimantan Barat 6 1
## 13 Kalimantan Selatan 1 5
## 14 Kalimantan Tengah 1 5
## 15 Kalimantan Timur 5 2
## 16 Kalimantan Utara 1 3
## 17 Kepulauan Bangka Belitung 1 5
## 18 Kepulauan Riau 5 2
## 19 Lampung 6 6
## 20 Maluku 6 6
## 21 Maluku Utara 6 6
## 22 Nusa Tenggara Barat 4 1
## 23 Nusa Tenggara Timur 6 4
## 24 Papua 6 4
## 25 Papua Barat 6 4
## 26 Riau 2 3
## 27 Sulawesi Barat 6 4
## 28 Sulawesi Selatan 4 1
## 29 Sulawesi Tengah 6 6
## 30 Sulawesi Tenggara 4 1
## 31 Sulawesi Utara 2 5
## 32 Sumatera Barat 1 3
## 33 Sumatera Sełatan 4 5
## 34 Sumatera Utara 1 5
data2020_fcm_6 <- cbind(data2020[, -1], Cluster = fcm_2020_6$cluster)
data2022_fcm_6 <- cbind(data2022[, -1], Cluster = fcm_2022_6$cluster)
summary2020_fcm_6 <- data2020_fcm_6 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_fcm_6 <- data2022_fcm_6 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_fcm_6)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2807838 | 97.99556 | 9.176667 | 71.88778 | 6.878889 | 0.3166667 | 1310238 | 62.95444 | 2130.487 | 70.38000 | 8.393333 |
| C2 | 2886751 | 99.21667 | 9.390000 | 72.57667 | 7.440000 | 0.3666667 | 1292415 | 62.84333 | 2134.020 | 72.11000 | 12.706667 |
| C4 | 2403749 | 94.00571 | 8.675714 | 70.94571 | 11.982857 | 0.3657143 | 1061486 | 62.24143 | 2133.286 | 70.45714 | 7.570000 |
| C5 | 2795781 | 98.12600 | 10.110000 | 77.00400 | 6.950000 | 0.3720000 | 1742945 | 68.06800 | 2151.124 | 72.43000 | 12.176000 |
| C6 | 2626628 | 94.77300 | 8.676000 | 67.03900 | 15.165000 | 0.3500000 | 1077314 | 54.18300 | 1940.277 | 67.62900 | 8.240000 |
knitr::kable(summary2022_fcm_6)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C1 | 2464809 | 93.53714 | 8.398571 | 72.76000 | 20.91143 | 0.7487143 | 1202113 | 62.85260 | 2066.146 | 71.68857 | 4.668571 |
| C2 | 4082839 | 98.86600 | 9.902000 | 76.39800 | 12.55200 | 0.7430000 | 1842300 | 65.75160 | 2094.250 | 72.47000 | 7.934000 |
| C3 | 2705749 | 97.96200 | 9.148000 | 73.43800 | 12.72600 | 0.6326000 | 1408558 | 60.14844 | 2040.923 | 71.71000 | 4.946000 |
| C4 | 2918778 | 93.14000 | 7.732000 | 65.98200 | 38.20000 | 0.7602000 | 1177166 | 55.04995 | 1959.405 | 66.86000 | 3.808000 |
| C5 | 2819309 | 98.87000 | 8.830000 | 72.18833 | 13.98000 | 0.6293333 | 1351052 | 66.41396 | 2138.527 | 70.36000 | 4.883333 |
| C6 | 2512405 | 98.25500 | 9.141667 | 70.89667 | 25.13167 | 0.6111667 | 1138950 | 57.48695 | 1964.342 | 69.17167 | 4.793333 |
pheatmap(as.matrix(summary2020_fcm_6[,-1]), main = "Profil Cluster FCM - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_fcm_6[,-1]), main = "Profil Cluster FCM - 2022", cluster_rows = FALSE)
table(DBSCAN2020 = db_2020_2$cluster)
## DBSCAN2020
## 0 1
## 3 31
table(DBSCAN2022 = db_2022_2$cluster)
## DBSCAN2022
## 0 1
## 2 32
hasil_2020_2 <- data.frame(Provinsi = data2020$Provinsi, Cluster = db_2020_2$cluster)
hasil_2022_2 <- data.frame(Provinsi = data2022$Provinsi, Cluster = db_2022_2$cluster)
gabung <- merge(hasil_2020_2, hasil_2022_2, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 1 1
## 2 Bali 1 1
## 3 Banten 1 1
## 4 Bengkulu 1 1
## 5 DI Yogyakarta 0 1
## 6 DKI Jakarta 1 1
## 7 Gorontalo 1 1
## 8 Jambi 1 1
## 9 Jawa Barat 1 1
## 10 Jawa Tengah 1 1
## 11 Jawa Timur 1 1
## 12 Kalimantan Barat 1 1
## 13 Kalimantan Selatan 1 1
## 14 Kalimantan Tengah 1 1
## 15 Kalimantan Timur 1 1
## 16 Kalimantan Utara 1 1
## 17 Kepulauan Bangka Belitung 1 1
## 18 Kepulauan Riau 1 1
## 19 Lampung 1 1
## 20 Maluku 1 1
## 21 Maluku Utara 1 1
## 22 Nusa Tenggara Barat 0 0
## 23 Nusa Tenggara Timur 1 1
## 24 Papua 0 0
## 25 Papua Barat 1 1
## 26 Riau 1 1
## 27 Sulawesi Barat 1 1
## 28 Sulawesi Selatan 1 1
## 29 Sulawesi Tengah 1 1
## 30 Sulawesi Tenggara 1 1
## 31 Sulawesi Utara 1 1
## 32 Sumatera Barat 1 1
## 33 Sumatera Sełatan 1 1
## 34 Sumatera Utara 1 1
data2020_db_2 <- cbind(data2020[, -1], Cluster = db_2020_2$cluster)
data2022_db_2 <- cbind(data2022[, -1], Cluster = db_2022_2$cluster)
summary2020_db_2 <- data2020_db_2 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_db_2 <- data2022_db_2 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_db_2)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C0 | 2488730 | 86.86333 | 8.330000 | 69.55333 | 17.630000 | 0.4000000 | 1270483 | 63.04333 | 2167.540 | 69.09667 | 6.410000 |
| C1 | 2694711 | 97.27161 | 9.155161 | 71.22871 | 9.729677 | 0.3441935 | 1250845 | 60.76935 | 2069.845 | 70.13226 | 9.377419 |
knitr::kable(summary2022_db_2)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C0 | 3195905 | 85.08000 | 7.315000 | 65.42500 | 40.43000 | 0.7730000 | 1307491 | 59.94264 | 2181.239 | 66.65000 | 3.760000 |
| C1 | 2855905 | 97.41219 | 8.934375 | 72.37719 | 19.29938 | 0.6817812 | 1339998 | 61.50745 | 2037.208 | 70.65344 | 5.208125 |
pheatmap(as.matrix(summary2020_db_2[,-1]), main = "Profil Cluster DBSCAN - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_db_2[,-1]), main = "Profil Cluster DBSCAN - 2022", cluster_rows = FALSE)
table(DBSCAN2020 = db_2020_4$cluster)
## DBSCAN2020
## 0 1
## 3 31
table(DBSCAN2022 = db_2022_4$cluster)
## DBSCAN2022
## 0 1
## 2 32
hasil_2020_4 <- data.frame(Provinsi = data2020$Provinsi, Cluster = db_2020_4$cluster)
hasil_2022_4 <- data.frame(Provinsi = data2022$Provinsi, Cluster = db_2022_4$cluster)
gabung <- merge(hasil_2020_4, hasil_2022_4, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 1 1
## 2 Bali 1 1
## 3 Banten 1 1
## 4 Bengkulu 1 1
## 5 DI Yogyakarta 0 1
## 6 DKI Jakarta 1 1
## 7 Gorontalo 1 1
## 8 Jambi 1 1
## 9 Jawa Barat 1 1
## 10 Jawa Tengah 1 1
## 11 Jawa Timur 1 1
## 12 Kalimantan Barat 1 1
## 13 Kalimantan Selatan 1 1
## 14 Kalimantan Tengah 1 1
## 15 Kalimantan Timur 1 1
## 16 Kalimantan Utara 1 1
## 17 Kepulauan Bangka Belitung 1 1
## 18 Kepulauan Riau 1 1
## 19 Lampung 1 1
## 20 Maluku 1 1
## 21 Maluku Utara 1 1
## 22 Nusa Tenggara Barat 0 0
## 23 Nusa Tenggara Timur 1 1
## 24 Papua 0 0
## 25 Papua Barat 1 1
## 26 Riau 1 1
## 27 Sulawesi Barat 1 1
## 28 Sulawesi Selatan 1 1
## 29 Sulawesi Tengah 1 1
## 30 Sulawesi Tenggara 1 1
## 31 Sulawesi Utara 1 1
## 32 Sumatera Barat 1 1
## 33 Sumatera Sełatan 1 1
## 34 Sumatera Utara 1 1
data2020_db_4 <- cbind(data2020[, -1], Cluster = db_2020_4$cluster)
data2022_db_4 <- cbind(data2022[, -1], Cluster = db_2022_4$cluster)
summary2020_db_4 <- data2020_db_4 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_db_4 <- data2022_db_4 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_db_4)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C0 | 2488730 | 86.86333 | 8.330000 | 69.55333 | 17.630000 | 0.4000000 | 1270483 | 63.04333 | 2167.540 | 69.09667 | 6.410000 |
| C1 | 2694711 | 97.27161 | 9.155161 | 71.22871 | 9.729677 | 0.3441935 | 1250845 | 60.76935 | 2069.845 | 70.13226 | 9.377419 |
knitr::kable(summary2022_db_4)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C0 | 3195905 | 85.08000 | 7.315000 | 65.42500 | 40.43000 | 0.7730000 | 1307491 | 59.94264 | 2181.239 | 66.65000 | 3.760000 |
| C1 | 2855905 | 97.41219 | 8.934375 | 72.37719 | 19.29938 | 0.6817812 | 1339998 | 61.50745 | 2037.208 | 70.65344 | 5.208125 |
pheatmap(as.matrix(summary2020_db_4[,-1]), main = "Profil Cluster DBSCAN - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_db_4[,-1]), main = "Profil Cluster DBSCAN - 2022", cluster_rows = FALSE)
table(DBSCAN2020 = db_2020_6$cluster)
## DBSCAN2020
## 0 1
## 4 30
table(DBSCAN2022 = db_2022_6$cluster)
## DBSCAN2022
## 0 1
## 4 30
hasil_2020_6 <- data.frame(Provinsi = data2020$Provinsi, Cluster = db_2020_6$cluster)
hasil_2022_6 <- data.frame(Provinsi = data2022$Provinsi, Cluster = db_2022_6$cluster)
gabung <- merge(hasil_2020_6, hasil_2022_6, by = "Provinsi", suffixes = c("_2020", "_2022"))
gabung
## Provinsi Cluster_2020 Cluster_2022
## 1 Aceh 1 1
## 2 Bali 1 1
## 3 Banten 1 1
## 4 Bengkulu 1 1
## 5 DI Yogyakarta 0 1
## 6 DKI Jakarta 1 0
## 7 Gorontalo 1 1
## 8 Jambi 1 1
## 9 Jawa Barat 1 1
## 10 Jawa Tengah 1 1
## 11 Jawa Timur 1 1
## 12 Kalimantan Barat 1 1
## 13 Kalimantan Selatan 1 1
## 14 Kalimantan Tengah 1 1
## 15 Kalimantan Timur 1 1
## 16 Kalimantan Utara 1 1
## 17 Kepulauan Bangka Belitung 1 1
## 18 Kepulauan Riau 1 0
## 19 Lampung 1 1
## 20 Maluku 1 1
## 21 Maluku Utara 1 1
## 22 Nusa Tenggara Barat 0 0
## 23 Nusa Tenggara Timur 1 1
## 24 Papua 0 0
## 25 Papua Barat 0 1
## 26 Riau 1 1
## 27 Sulawesi Barat 1 1
## 28 Sulawesi Selatan 1 1
## 29 Sulawesi Tengah 1 1
## 30 Sulawesi Tenggara 1 1
## 31 Sulawesi Utara 1 1
## 32 Sumatera Barat 1 1
## 33 Sumatera Sełatan 1 1
## 34 Sumatera Utara 1 1
data2020_db_6 <- cbind(data2020[, -1], Cluster = db_2020_6$cluster)
data2022_db_6 <- cbind(data2022[, -1], Cluster = db_2022_6$cluster)
summary2020_db_6 <- data2020_db_6 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
summary2022_db_6 <- data2022_db_6 %>%
mutate(Cluster = paste0("C", Cluster)) %>%
group_by(Cluster) %>%
summarise_all(mean, na.rm = TRUE)
knitr::kable(summary2020_db_6)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C0 | 2662604 | 89.52750 | 8.7475 | 68.43750 | 18.565000 | 0.395 | 1301616 | 61.07250 | 2101.332 | 68.32750 | 7.8675 |
| C1 | 2678394 | 97.26333 | 9.1270 | 71.43333 | 9.341667 | 0.343 | 1246039 | 60.95633 | 2075.417 | 70.26933 | 9.2820 |
knitr::kable(summary2022_db_6)
| Cluster | UMP (Rp/bulan) | Melek Huruf (%) | RLS (%) | IPM (%) | Penduduk Miskin (%) | Gini Ratio | Pengeluaran Kapita (Rp/bulan) | Konsumsi Protein (g/hari) | Konsumsi Kalori (kkal/hari) | Estimasi Harapan Hidup | TPT (%) |
|---|---|---|---|---|---|---|---|---|---|---|---|
| C0 | 4028967 | 92.21750 | 9.077500 | 72.240 | 25.6075 | 0.7620000 | 1743008 | 63.17603 | 2132.304 | 69.28000 | 5.885000 |
| C1 | 2722164 | 97.28267 | 8.807333 | 71.932 | 19.8670 | 0.6771667 | 1284096 | 61.18065 | 2034.130 | 70.56967 | 5.021333 |
pheatmap(as.matrix(summary2020_db_6[,-1]), main = "Profil Cluster DBSCAN - 2020", cluster_rows = FALSE)
pheatmap(as.matrix(summary2022_db_6[,-1]), main = "Profil Cluster DBSCAN - 2022", cluster_rows = FALSE)