Prof. Kismiantini, S.Si., M.Si., Ph.D.
## Warning: package 'psych' was built under R version 4.5.2
## corrplot 0.95 loaded
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'lubridate' was built under R version 4.5.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.5
## ✔ lubridate 1.9.4 ✔ stringr 1.5.2
## ✔ purrr 1.1.0 ✔ tibble 3.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
## Warning: package 'scatterplot3d' was built under R version 4.5.2
## Warning: package 'factoextra' was built under R version 4.5.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Warning: package 'sf' was built under R version 4.5.2
## Linking to GEOS 3.13.1, GDAL 3.11.4, PROJ 9.7.0; sf_use_s2() is TRUE
## 'data.frame': 119 obs. of 6 variables:
## $ Wilayah: chr " Pandeglang" " Lebak" " Tangerang" " Serang" ...
## $ IKK : num 1.21 1.35 1.06 0.56 0.72 0.42 0.77 0.32 2.32 2.11 ...
## $ IPM : num 65.8 64.7 73 67.8 78.9 ...
## $ TPT : num 9.24 8.55 7.88 10.61 7.16 ...
## $ PPK : int 8827 8854 12427 10916 14909 13185 13709 15997 10511 16002 ...
## $ RLS : num 7.13 6.59 8.92 7.78 10.84 ...
## Wilayah IKK IPM TPT
## Length:119 Min. :0.320 Min. :63.39 Min. : 1.360
## Class :character 1st Qu.:0.930 1st Qu.:69.78 1st Qu.: 4.495
## Mode :character Median :1.330 Median :72.97 Median : 5.920
## Mean :1.425 Mean :73.71 Mean : 6.124
## 3rd Qu.:1.735 3rd Qu.:76.77 3rd Qu.: 7.830
## Max. :3.720 Max. :87.69 Max. :10.780
## PPK RLS
## Min. : 4173 Min. : 6.120
## 1st Qu.: 9974 1st Qu.: 7.755
## Median :12379 Median : 9.620
## Mean :12546 Mean :10.184
## 3rd Qu.:15105 3rd Qu.:12.710
## Max. :24221 Max. :15.760
dat_long <- dat %>%
pivot_longer(
cols = c(IKK, IPM, TPT, PPK, RLS),
names_to = "Variable",
values_to = "Value"
)
ggplot(dat_long, aes(x = Value, fill = Variable)) +
geom_histogram(bins = 30, color = "white", alpha = 0.9) +
facet_wrap(~ Variable, scales = "free") +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Distribution of Variables",
x = "Value",
y = "Count"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
strip.text = element_text(face = "bold"),
legend.position = "none"
)dat_long <- dat %>%
pivot_longer(
cols = c(IKK, IPM, TPT, PPK, RLS),
names_to = "Variable",
values_to = "Value"
)
ggplot(dat_long, aes(x = Variable, y = Value, fill = Variable)) +
geom_boxplot(alpha = 0.9, color = "black") +
facet_wrap(~ Variable, scales = "free") +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Boxplot of Variables",
x = "Variable",
y = "Value"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", size = 16),
legend.position = "none",
strip.text = element_text(face = "bold")
)## IKK IPM TPT PPK RLS
## [1,] -0.3177316 -1.4805002 1.3674590 -0.9910274 -1.14343261
## [2,] -0.1103909 -1.6931774 1.0646813 -0.9838324 -1.34563686
## [3,] -0.5398823 -0.1385636 0.7706799 -0.0316934 -0.47316297
## [4,] -1.2803848 -1.1210193 1.9686261 -0.4343472 -0.90003861
## [5,] -1.0434240 0.9775211 0.4547380 0.6297141 0.24578546
## [6,] -1.4877255 0.0458821 0.8672177 0.1702997 0.05855931
max_dim <- 5
stress_values <- numeric(max_dim)
for (k in 1:max_dim) {
fit_k <- cmdscale(matriks_jarak, k = k) # MDS for dimensions k
disparities_k <- as.matrix(dist(fit_k)) #
stress_values[k] <- sqrt(sum((matriks_jarak - disparities_k)^2) / sum(matriks_jarak^2)) # Value of stress
}
stress_values## [1] 4.810098e-01 2.701668e-01 1.332582e-01 4.216117e-02 7.910995e-16
plot(1:max_dim, stress_values, type = "b", pch = 19, col = "blue",
xlab = "Dimensi", ylab = "Nilai STRESS",
main = "Grafik STRESS setiap Dimensi",
ylim = c(min(stress_values) - 0.01, max(stress_values) + 0.01))
abline(h = 0.01, col = "red", lty = 2)ggplot(dat, aes(x = MDS1, y = MDS2, label = Wilayah)) +
geom_point(size = 3, color = "blue") +
geom_text(vjust = -1, size = 3) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
labs(title = "MDS 2D Welfare Visualization",
x = "Dimensi 1",
y = "Dimensi 2") +
theme_minimal()stress_2d <- sqrt(sum((matriks_jarak - as.matrix(dist(fit[, 1:2])))^2) / sum(matriks_jarak^2))
stress_2d## [1] 0.2701668
plot_ly(dat, x = ~MDS1, y = ~MDS2, z = ~MDS3,
type = "scatter3d", mode = "markers+text",
text = ~Wilayah) %>%
layout(title = "MDS 3D Welfare Visualization",
scene = list(xaxis = list(title = "Dimensi 1"),
yaxis = list(title = "Dimensi 2"),
zaxis = list(title = "Dimensi 3")))stress_3d <- sqrt(sum((matriks_jarak - as.matrix(dist(fit[, 1:3])))^2) / sum(matriks_jarak^2))
stress_3d## [1] 0.1332582
#Elbow Method
fviz_nbclust(data_kmeans_mds, kmeans, method = "wss") +
geom_vline(xintercept = 5, linetype=2) +
labs(subtitle="Elbow Method")#Silhouette Method
fviz_nbclust(data_kmeans_mds, kmeans, method = "silhouette") +
ggtitle("Silhouette Method")set.seed(2025)
k_opt <- 5
kmeans_mds <- kmeans(data_kmeans_mds, centers = k_opt, nstart = 25)
kmeans_mds$centers## MDS1 MDS2 MDS3
## 1 0.71898761 -1.468218434 0.1263411
## 2 0.09762834 0.429710178 -1.1160768
## 3 1.87027041 0.850923370 -0.1085968
## 4 -2.37875976 -0.009646127 -0.0361262
## 5 -0.03983325 0.779586895 1.0269064
## [1] 1 1 1 1 4 1
## Levels: 1 2 3 4 5
ggplot(dat, aes(x = MDS1, y = MDS2, color = Cluster_MDS, label = Wilayah)) +
geom_point(size = 3) +
geom_text(vjust = -1, size = 3) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
labs(title = paste("Clustering Based Coordinate MDS 3D (k =", k_opt, ")"),
x = "Dimensi 1",
y = "Dimensi 2",
color = "Cluster") +
theme_minimal() +
theme(legend.position = "right")plot_ly(dat, x = ~MDS1, y = ~MDS2, z = ~MDS3,
type = "scatter3d", mode = "markers+text",
text = ~Wilayah,
color = ~Cluster_MDS) %>%
layout(
title = paste("Cluster MDS 3D (k =", k_opt, ")"),
scene = list(
xaxis = list(title = "Dimensi 1"),
yaxis = list(title = "Dimensi 2"),
zaxis = list(title = "Dimensi 3")
)
)cluster_list_mds <- split(dat$Wilayah, dat$Cluster_MDS)
for (k in names(cluster_list_mds)) {
cat("Cluster", k, ":\n")
print(cluster_list_mds[[k]])
cat("\n")
}## Cluster 1 :
## [1] " Pandeglang" " Lebak" " Tangerang" " Serang"
## [5] "Cilegon" "Kota Serang" "Kepulauan Seribu" "Bogor"
## [9] "Sukabumi" "Cianjur" "Bandung" "Garut"
## [13] "Kuningan" "Cirebon" "Sumedang" "Indramayu"
## [17] "Subang" "Purwakarta" "Karawang" "Bekasi"
## [21] "Bandung Barat" "Kota Bogor" "Kota Sukabumi" "Kota Cirebon"
## [25] "Cimahi" "Banjar" "Cilacap" "Batang"
## [29] "Tegal" "Brebes"
##
## Cluster 2 :
## [1] "Bantul" "Banyumas" "Purbalingga" "Purworejo"
## [5] "Magelang" "Boyolali" "Klaten" "Sukoharjo"
## [9] "Wonogiri" "Karanganyar" "Sragen" "Grobogan"
## [13] "Blora" "Rembang" "Pati" "Kudus"
## [17] "Jepara" "Demak" "Semarang" "Temanggung"
## [21] "Kendal" "Pekalongan" "Kota Pekalongan" "Kota Tegal"
##
## Cluster 3 :
## [1] "Kulonprogo" "Gunungkidul" "Tasikmalaya" "Ciamis"
## [5] "Majalengka" "Pangandaran" "Kota Tasikmalaya" "Banjarnegara"
## [9] "Kebumen" "Wonosobo" "Pemalang" "Probolinggo"
## [13] "Tuban" "Bangkalan" "Sampang" "Pamekasan"
## [17] "Sumenep"
##
## Cluster 4 :
## [1] "Kota Tangerang" "Tangerang Selatan" "Sleman"
## [4] "Yogyakarta" "Jakarta Selatan" "Jakarta Timur"
## [7] "Jakarta Pusat" "Jakarta Barat" "Jakarta Utara"
## [10] "Kota Bandung" "Kota Bekasi" "Depok"
## [13] "Kota Magelang" "Surakarta" "Salatiga"
## [16] "Kota Semarang" "Sidoarjo" "Kota Blitar"
## [19] "Kota Malang" "Kota Mojokerto" "Kota Madiun"
## [22] "Surabaya" "Batu"
##
## Cluster 5 :
## [1] "Pacitan" "Ponorogo" "Trenggalek" "Tulungagung"
## [5] "Blitar" "Kediri" "Malang" "Lumajang"
## [9] "Jember" "Banyuwangi" "Bondowoso" "Situbondo"
## [13] "Pasuruan" "Mojokerto" "Jombang" "Nganjuk"
## [17] "Madiun" "Magetan" "Ngawi" "Bojonegoro"
## [21] "Lamongan" "Gresik" "Kota Kediri" "Kota Probolinggo"
## [25] "Kota Pasuruan"
aggregate(dat[, c("IKK", "IPM", "TPT", "PPK", "RLS")], by = list(Cluster = dat$Cluster_MDS), FUN = mean)## Reading layer `gadm41_IDN_2' from data source `D:\stat\gadm41_IDN_2.shp' using driver `ESRI Shapefile'
## Simple feature collection with 502 features and 13 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 95.00971 ymin: -11.00761 xmax: 141.0194 ymax: 6.076941
## Geodetic CRS: WGS 84
jawa <- indo %>%
filter(NAME_1 %in% c("Banten","Jakarta Raya","Jawa Barat",
"Jawa Tengah","Jawa Timur","DI Yogyakarta"))
jawa <- jawa %>%
mutate(
Wilayah_raw = NAME_2,
Wilayah = NAME_2,
Wilayah = str_replace(Wilayah, "Kabupaten ", "Kab "),
Wilayah = str_replace(Wilayah, "Kota ", "Kota "),
Wilayah = str_trim(Wilayah)
)
dat <- dat %>%
mutate(
Wilayah = str_replace(Wilayah, "Kab ", "Kab "),
Wilayah = str_replace(Wilayah, "Kota ", "Kota "),
Wilayah = str_trim(Wilayah)
)
map_cluster <- jawa %>% left_join(dat, by = "Wilayah")
ggplot(map_cluster) +
geom_sf(aes(fill = as.factor(Cluster_MDS)),
color = "white",
size = 0.2) +
scale_fill_brewer(palette = "Set2", na.value = "grey80") +
labs(
title = "Cluster Map Kabupaten/Kota in Java and Madura ",
subtitle = "Based on MDS Result",
fill = "Cluster"
) +
theme_minimal(base_size = 12) +
theme(
plot.title = element_text(face = "bold", size = 16),
legend.position = "right"
)