if (!require(fclust)) {
install.packages("fclust")
}
## Loading required package: fclust
## Warning: package 'fclust' was built under R version 4.3.3
library(fclust)
library(e1071)
## Warning: package 'e1071' was built under R version 4.3.3
## Registered S3 method overwritten by 'e1071':
## method from
## print.fclust fclust
library(cluster)
## Warning: package 'cluster' was built under R version 4.3.3
library(fclust)
library(frbs)
## Warning: package 'frbs' was built under R version 4.3.3
library("kernlab")
## Warning: package 'kernlab' was built under R version 4.3.3
library("tidyverse")
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tibble' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::alpha() masks kernlab::alpha()
## ✖ purrr::cross() masks kernlab::cross()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("factoextra")
## Warning: package 'factoextra' was built under R version 4.3.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library("cluster")
library("DataExplorer")
## Warning: package 'DataExplorer' was built under R version 4.3.2
library("ggpubr")
## Warning: package 'ggpubr' was built under R version 4.3.3
library("tictoc")
## Warning: package 'tictoc' was built under R version 4.3.3
##
## Attaching package: 'tictoc'
##
## The following object is masked from 'package:kernlab':
##
## size
# Membaca semua file CSV
x1 <- read.csv("x1_asi eksklusif (2).csv")
x2 <- read.csv("x2_persentase pendamping.csv")
x3 <- read.csv("x3_Persentase Imunisasi.csv")
x4 <- read.csv("x4_Jaminan.csv")
x5 <- read.csv("x5_sanitasi layak.csv")
x6 <- read.csv("x6_persentase air minum layak.csv")
x7 <- read.csv("x7_Gizi Percentage mean.csv")
kota <- read.csv("Summary_Cluster tableau.csv", sep = ";")
data <- data.frame("R102" = x1$R102, "x1" = round(x1$persentase_asi_eksklusif,2),
"x2" = round(x2$persentase_pendamping,2),
"x3" = round(x3$persentase_lengkap,2) ,
"x4" = round(x4$persentase_memiliki_jaminan,4),
"x5" = round(x5$Persentase,2),
"x6" = round(x6$Persentase,2),
"x7" = x7$Gizi.Percentage)
data_kota <- data.frame("R102" = x1$R102, "x1" = round(x1$persentase_asi_eksklusif,2),
"x2" = round(x2$persentase_pendamping,2),
"x3" = round(x3$persentase_lengkap,2) ,
"x4" = round(x4$persentase_memiliki_jaminan,4),
"x5" = round(x5$Persentase,2),
"x6" = round(x6$Persentase,2),
"x7" = x7$Gizi.Percentage,
"kota" = kota$Kabupaten.kota)
data
## R102 x1 x2 x3 x4 x5 x6 x7
## 1 1 96.52 98.63 0.07 57.7483 77.28 83.93 3.60
## 2 2 92.39 96.83 0.03 50.5455 81.35 68.83 3.89
## 3 3 93.33 90.12 0.09 31.6522 69.67 73.93 3.86
## 4 4 91.35 88.73 0.05 50.1543 75.04 81.05 3.60
## 5 5 92.59 98.39 0.03 36.6951 56.78 77.68 4.10
## 6 6 91.01 88.14 0.07 32.8277 56.01 69.94 3.57
## 7 7 97.59 97.96 0.00 48.6842 83.12 84.47 3.23
## 8 8 95.52 92.11 0.04 57.5531 90.85 78.22 3.41
## 9 9 98.72 96.00 0.00 58.5742 89.78 80.58 4.00
## 10 10 92.86 100.00 0.00 44.9799 81.43 85.93 3.72
## 11 11 85.07 91.67 0.04 55.1300 92.64 88.85 3.66
## 12 12 91.07 96.15 0.03 44.4327 95.67 80.38 3.72
## 13 13 87.50 96.43 0.07 42.3280 90.10 79.09 3.72
## 14 14 87.65 93.88 0.19 60.6982 94.16 70.07 3.98
## 15 15 89.47 96.77 0.09 61.7424 81.73 73.53 3.86
## 16 16 88.79 95.59 0.03 82.6220 89.73 81.69 3.71
## 17 17 91.75 96.23 0.03 41.9298 86.62 82.19 3.45
## 18 18 85.71 90.91 0.00 67.4912 86.31 90.38 3.18
## 19 71 96.92 92.31 0.04 81.3657 75.15 87.52 3.90
## 20 72 95.00 94.29 0.00 66.0589 45.59 87.69 3.94
## 21 73 96.88 92.50 0.06 73.2582 55.68 83.71 3.78
## 22 74 94.59 96.00 0.20 86.8881 94.39 86.97 3.68
## 23 75 88.61 92.16 0.05 82.0101 98.30 79.68 3.80
## 24 76 94.44 89.13 0.03 76.8707 97.78 90.94 3.55
## 25 77 84.91 96.67 0.12 73.5336 76.25 87.10 3.56
## 26 78 93.44 95.00 0.08 57.4074 56.76 87.43 3.97
## 27 79 93.02 91.30 0.00 81.3433 90.18 88.42 3.58
plot_intro(data,ggtheme = theme_pubr())
## Sebaran
plot_density(data[-1],binary_as_factor = FALSE,
geom_density_args = list(fill="red"),
nrow = 3,
ncol = 3,
ggtheme = theme_pubr(base_size = 9))
# Boxplot tiap Prediktor
tmp <- par(mfrow=c(3,4))
names <- colnames(data[-1])
for(i in names){
boxplot(data[-1][[i]], horizontal=T, col='red')
title(i)
}
par(tmp)
## Outlier
# Misalkan df adalah nama dataframe dan 'peubah' adalah nama kolom yang ingin kamu cek outlier-nya
# Menghitung Q1 (kuartil pertama) dan Q3 (kuartil ketiga)
Q1 <- quantile(data[-1]$x3, 0.25)
Q3 <- quantile(data[-1]$x3, 0.75)
# Menghitung IQR
IQR_value <- IQR(data[-1]$x3)
# Batas bawah dan batas atas untuk outlier
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
# Mengambil outlier
outliers <- data[data[-1]$x3 < lower_bound | data[-1]$x3 > upper_bound, ]
# Melihat outlier
print(outliers)
## R102 x1 x2 x3 x4 x5 x6 x7
## 14 14 87.65 93.88 0.19 60.6982 94.16 70.07 3.98
## 22 74 94.59 96.00 0.20 86.8881 94.39 86.97 3.68
# Misalkan df adalah nama dataframe dan 'peubah' adalah nama kolom yang ingin kamu cek outlier-nya
# Menghitung Q1 (kuartil pertama) dan Q3 (kuartil ketiga)
Q1 <- quantile(data[-1]$x5, 0.25)
Q3 <- quantile(data[-1]$x5, 0.75)
# Menghitung IQR
IQR_value <- IQR(data[-1]$x5)
# Batas bawah dan batas atas untuk outlier
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value
# Mengambil outlier
outliers <- data[data[-1]$x5 < lower_bound | data[-1]$x5 > upper_bound, ]
# Melihat outlier
print(outliers)
## R102 x1 x2 x3 x4 x5 x6 x7
## 20 72 95 94.29 0 66.0589 45.59 87.69 3.94
library(skimr)
## Warning: package 'skimr' was built under R version 4.3.2
skim_without_charts(data = data[-1])
Name | data[-1] |
Number of rows | 27 |
Number of columns | 7 |
_______________________ | |
Column type frequency: | |
numeric | 7 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 |
---|---|---|---|---|---|---|---|---|---|
x1 | 0 | 1 | 92.10 | 3.88 | 84.91 | 89.13 | 92.59 | 94.80 | 98.72 |
x2 | 0 | 1 | 94.22 | 3.26 | 88.14 | 91.89 | 95.00 | 96.55 | 100.00 |
x3 | 0 | 1 | 0.05 | 0.05 | 0.00 | 0.03 | 0.04 | 0.07 | 0.20 |
x4 | 0 | 1 | 59.43 | 16.30 | 31.65 | 46.83 | 57.75 | 73.40 | 86.89 |
x5 | 0 | 1 | 80.31 | 14.78 | 45.59 | 75.10 | 83.12 | 90.52 | 98.30 |
x6 | 0 | 1 | 81.86 | 6.38 | 68.83 | 78.66 | 82.19 | 87.26 | 90.94 |
x7 | 0 | 1 | 3.70 | 0.23 | 3.18 | 3.58 | 3.72 | 3.88 | 4.10 |
data_kota$kota[data_kota$x1 == min(data_kota$x1)]
## [1] "Kota Cimahi"
data_kota$kota[data_kota$x2 == min(data_kota$x2)]
## [1] "Tasikmalaya"
data_kota$kota[data_kota$x3 == min(data_kota$x3)]
## [1] "Ciamis" "Cirebon" "Majalengka" "Pangandaran"
## [5] "Kota Sukabumi" "Kota Banjar"
data_kota$kota[data_kota$x4 == min(data_kota$x4)]
## [1] "Cianjur"
data_kota$kota[data_kota$x5 == min(data_kota$x5)]
## [1] "Kota Sukabumi"
data_kota$kota[data_kota$x6 == min(data_kota$x6)]
## [1] "Sukabumi"
data_kota$kota[data_kota$x7 == min(data_kota$x7)]
## [1] "Pangandaran"
data_kota$kota[data_kota$x1 == max(data_kota$x1)]
## [1] "Cirebon"
data_kota$kota[data_kota$x2 == max(data_kota$x2)]
## [1] "Majalengka"
data_kota$kota[data_kota$x3 == max(data_kota$x3)]
## [1] "Kota Cirebon"
data_kota$kota[data_kota$x4 == max(data_kota$x4)]
## [1] "Kota Cirebon"
data_kota$kota[data_kota$x5 == max(data_kota$x5)]
## [1] "Kota Bekasi"
data_kota$kota[data_kota$x6 == max(data_kota$x6)]
## [1] "Kota Depok"
data_kota$kota[data_kota$x7 == max(data_kota$x7)]
## [1] "Garut"
# Menghitung kota dengan nilai minimum
min_kota <- sapply(data_kota[, paste0("x", 1:7)], function(x) data_kota$kota[which.min(x)])
# Menghitung kota dengan nilai maksimum
max_kota <- sapply(data_kota[, paste0("x", 1:7)], function(x) data_kota$kota[which.max(x)])
# Membuat data_kotaframe yang rapi dan informatif
result_df <- data.frame(
Variable = paste0("x", 1:7),
Min_Kota = min_kota,
Max_Kota = max_kota
)
# Fungsi untuk menangani beberapa hasil minimum
min_kota <- sapply(data[, paste0("x", 1:7)], function(x) paste(data$kota[which(x == min(x))], collapse = ", "))
# Fungsi untuk menangani beberapa hasil maksimum
max_kota <- sapply(data[, paste0("x", 1:7)], function(x) paste(data$kota[which(x == max(x))], collapse = ", "))
# Membuat dataframe yang rapi dan informatif
result_df <- data.frame(
Min_Kota = min_kota,
Max_Kota = max_kota
)
# Mengubah nama kolom menjadi lebih ringkas
rownames(result_df) <- c("Persentase_ASI_Eksklusif",
"Persentase_MP_ASI",
"Persentase_Imunisasi_Lengkap",
"Persentase_Jaminan_Kesehatan",
"Persentase_Akses_Sanitasi_Layak",
"Persentase_Akses_Air_Layak",
"Persentase_Konsumsi_Gizi")
# Menampilkan hasil
print(result_df)
## Min_Kota Max_Kota
## Persentase_ASI_Eksklusif
## Persentase_MP_ASI
## Persentase_Imunisasi_Lengkap
## Persentase_Jaminan_Kesehatan
## Persentase_Akses_Sanitasi_Layak
## Persentase_Akses_Air_Layak
## Persentase_Konsumsi_Gizi
# Heatmap Corrrelation Pearson
plot_correlation(data = data[-1],
type = "all",
cor_args = list(method="spearman"),
ggtheme = theme_classic(),
theme_config = list(legend.position = "none",
axis.text.x=element_text(angle = 90)))
Tidak ada yang berkorelasi
require(psych)
## Loading required package: psych
## Warning: package 'psych' was built under R version 4.3.3
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:kernlab':
##
## alpha
pairs.panels(data[-1], method = "spearman")
# column wise missing values
colSums(is.na(data))
## R102 x1 x2 x3 x4 x5 x6 x7
## 0 0 0 0 0 0 0 0
# Handle missing values
data<- na.omit(data)
Gaada data kosong
data_for_clustering <- data[, c("x1", "x2", "x3", "x4","x5","x6","x7")]
tic()
set.seed(123)
fviz_nbclust(
x = data_for_clustering,
FUNcluster = hcut,
method = "wss",
hc_method = "complete",
hc_fun = "hclust",
k.max = 25
)
Penurunan Drastis terakhir ada di cluster 3 ke 4. Sehingga, cluster 4 mungkin cluster yang optimum
metode_agg <- c("single","complete",
"average","ward.D","median","centroid",
"mcquitty")
map(metode_agg, function(i)
fviz_nbclust(
x = data_for_clustering,
FUNcluster = hcut,
method = "silhouette",
hc_method = i,
hc_fun = "hclust",
k.max = 25
)+
ggtitle(str_c("Optimal number of clusters based on HC with ",i," linkage"))
)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
Seluruh grafik menunjukkan cluster optimum pada 4 cluster.
Karena dua alasan tadi, oleh karena itu dipilih cluster optimum yaitu 4 cluster
library(ppclust)
## Warning: package 'ppclust' was built under R version 4.3.3
##
## Attaching package: 'ppclust'
## The following object is masked from 'package:psych':
##
## pca
f_cmeans <- fcm(data_for_clustering, centers=4, m = 2)
as.data.frame(f_cmeans$u)
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 1 0.58502272 0.13347114 0.16885106 0.112655073
## 2 0.65372671 0.08732070 0.08862539 0.170327195
## 3 0.08214835 0.02110022 0.03964624 0.857105182
## 4 0.54088852 0.08578773 0.14243250 0.230891241
## 5 0.06208661 0.02228773 0.07253836 0.843087294
## 6 0.05161885 0.02021903 0.05304843 0.875113689
## 7 0.82383695 0.05169384 0.05110991 0.073359299
## 8 0.78520483 0.11751085 0.04731261 0.049971717
## 9 0.74203820 0.14307740 0.05932692 0.055557476
## 10 0.73268206 0.06298265 0.07280334 0.131531945
## 11 0.65676884 0.18779989 0.07630671 0.079124560
## 12 0.79054528 0.07882846 0.04776771 0.082858557
## 13 0.78232484 0.06294722 0.04910546 0.105622483
## 14 0.56893551 0.25165701 0.08583475 0.093572731
## 15 0.58261147 0.19297657 0.12276917 0.101642785
## 16 0.03686169 0.92659219 0.02443773 0.012108398
## 17 0.80036653 0.04981330 0.04594886 0.103871320
## 18 0.29024945 0.52969310 0.11508916 0.064968285
## 19 0.14261944 0.54197636 0.25096090 0.064443306
## 20 0.04277214 0.03681621 0.86104995 0.059361693
## 21 0.03712983 0.04121985 0.88704333 0.034606991
## 22 0.05326938 0.89054334 0.03722535 0.018961925
## 23 0.07772950 0.85842268 0.03935149 0.024496339
## 24 0.09283683 0.83661374 0.04401085 0.026538569
## 25 0.21113591 0.46320116 0.24695258 0.078710352
## 26 0.06782784 0.03931397 0.79758463 0.095273562
## 27 0.01631766 0.96730174 0.01110831 0.005272283
summary(f_cmeans)
## Summary for 'f_cmeans'
##
## Number of data objects: 27
##
## Number of clusters: 4
##
## Crisp clustering vector:
## [1] 1 1 4 1 4 4 1 1 1 1 1 1 1 1 1 2 1 2 2 3 3 2 2 2 2 3 2
##
## Initial cluster prototypes:
## x1 x2 x3 x4 x5 x6 x7
## Cluster 1 98.72 96.00 0.00 58.5742 89.78 80.58 4.00
## Cluster 2 94.44 89.13 0.03 76.8707 97.78 90.94 3.55
## Cluster 3 96.88 92.50 0.06 73.2582 55.68 83.71 3.78
## Cluster 4 89.47 96.77 0.09 61.7424 81.73 73.53 3.86
##
## Final cluster prototypes:
## x1 x2 x3 x4 x5 x6 x7
## Cluster 1 92.33050 95.66647 0.04160781 51.31007 86.68345 80.49127 3.646332
## Cluster 2 91.46070 93.01288 0.05946008 79.71876 91.20032 85.63463 3.651362
## Cluster 3 94.81695 93.92280 0.04767975 66.13097 55.30646 85.92202 3.869817
## Cluster 4 92.23131 92.28385 0.06265749 35.35767 62.44590 74.31127 3.824791
##
## Distance between the final cluster prototypes
## Cluster 1 Cluster 2 Cluster 3
## Cluster 2 861.7083
## Cluster 3 1242.9403 1485.2203
## Cluster 4 891.6147 2924.0972 1142.1510
##
## Difference between the initial and final cluster prototypes
## x1 x2 x3 x4 x5 x6
## Cluster 1 -6.389501 -0.3335303 0.04160781 -7.264125 -3.0965511 -0.0887341
## Cluster 2 -2.979302 3.8828798 0.02946008 2.848063 -6.5796843 -5.3053701
## Cluster 3 -2.063047 1.4228019 -0.01232026 -7.127235 -0.3735438 2.2120176
## Cluster 4 2.761305 -4.4861466 -0.02734251 -26.384733 -19.2841002 0.7812713
## x7
## Cluster 1 -0.35366813
## Cluster 2 0.10136153
## Cluster 3 0.08981725
## Cluster 4 -0.03520912
##
## Root Mean Squared Deviations (RMSD): 18.47603
## Mean Absolute Deviation (MAD): 186.1208
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 1 0.58502272 0.13347114 0.16885106 0.1126551
## 2 0.65372671 0.08732070 0.08862539 0.1703272
## 3 0.08214836 0.02110022 0.03964624 0.8571052
## 4 0.54088852 0.08578773 0.14243250 0.2308912
## 5 0.06208661 0.02228773 0.07253836 0.8430873
## ...
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 23 0.07772950 0.85842268 0.03935149 0.024496339
## 24 0.09283683 0.83661374 0.04401085 0.026538569
## 25 0.21113591 0.46320116 0.24695258 0.078710352
## 26 0.06782784 0.03931397 0.79758463 0.095273562
## 27 0.01631766 0.96730174 0.01110831 0.005272283
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 13 0.5408885 0.5850227 0.6957656 0.7326821 0.7852048 0.8238370
## Cluster 2 8 0.4632012 0.5389055 0.7517930 0.8475182 0.8995556 0.9673017
## Cluster 3 3 0.7975846 0.8293173 0.8485593 0.8610499 0.8740466 0.8870433
## Cluster 4 3 0.8430873 0.8500962 0.8584354 0.8571052 0.8661094 0.8751137
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.6106878 0.4809171
##
## Within cluster sum of squares by cluster:
## 1 2 3 4
## 1875.0622 1118.6255 221.0829 223.7435
## (between_SS / total_SS = 75.68%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
tic()
library(ppclust)
f_cmeans_2 <- fcm(data_for_clustering, centers=4, m = 2, nstart = 10 )
as.data.frame(f_cmeans_2$u)
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 1 0.13347114 0.58502272 0.16885106 0.112655073
## 2 0.08732070 0.65372671 0.08862539 0.170327195
## 3 0.02110022 0.08214835 0.03964624 0.857105182
## 4 0.08578773 0.54088852 0.14243250 0.230891241
## 5 0.02228773 0.06208661 0.07253836 0.843087294
## 6 0.02021903 0.05161885 0.05304843 0.875113689
## 7 0.05169384 0.82383695 0.05110991 0.073359299
## 8 0.11751085 0.78520483 0.04731261 0.049971717
## 9 0.14307740 0.74203820 0.05932692 0.055557476
## 10 0.06298265 0.73268206 0.07280334 0.131531945
## 11 0.18779989 0.65676884 0.07630671 0.079124560
## 12 0.07882846 0.79054528 0.04776771 0.082858557
## 13 0.06294722 0.78232484 0.04910546 0.105622483
## 14 0.25165701 0.56893551 0.08583475 0.093572731
## 15 0.19297657 0.58261147 0.12276917 0.101642785
## 16 0.92659219 0.03686169 0.02443773 0.012108398
## 17 0.04981330 0.80036653 0.04594886 0.103871320
## 18 0.52969310 0.29024945 0.11508916 0.064968285
## 19 0.54197636 0.14261944 0.25096090 0.064443306
## 20 0.03681621 0.04277214 0.86104995 0.059361693
## 21 0.04121985 0.03712983 0.88704333 0.034606991
## 22 0.89054334 0.05326938 0.03722535 0.018961925
## 23 0.85842268 0.07772950 0.03935149 0.024496339
## 24 0.83661374 0.09283683 0.04401085 0.026538569
## 25 0.46320116 0.21113591 0.24695258 0.078710352
## 26 0.03931397 0.06782784 0.79758463 0.095273562
## 27 0.96730174 0.01631766 0.01110831 0.005272283
toc()
## 3.31 sec elapsed
summary(f_cmeans_2)
## Summary for 'f_cmeans_2'
##
## Number of data objects: 27
##
## Number of clusters: 4
##
## Crisp clustering vector:
## [1] 2 2 4 2 4 4 2 2 2 2 2 2 2 2 2 1 2 1 1 3 3 1 1 1 1 3 1
##
## Initial cluster prototypes:
## x1 x2 x3 x4 x5 x6 x7
## Cluster 1 93.33 90.12 0.09 31.6522 69.67 73.93 3.86
## Cluster 2 85.71 90.91 0.00 67.4912 86.31 90.38 3.18
## Cluster 3 97.59 97.96 0.00 48.6842 83.12 84.47 3.23
## Cluster 4 87.65 93.88 0.19 60.6982 94.16 70.07 3.98
##
## Final cluster prototypes:
## x1 x2 x3 x4 x5 x6 x7
## Cluster 1 91.46070 93.01288 0.05946008 79.71876 91.20032 85.63463 3.651362
## Cluster 2 92.33050 95.66647 0.04160781 51.31007 86.68345 80.49127 3.646332
## Cluster 3 94.81695 93.92280 0.04767975 66.13097 55.30646 85.92202 3.869817
## Cluster 4 92.23131 92.28385 0.06265749 35.35767 62.44590 74.31127 3.824791
##
## Distance between the final cluster prototypes
## Cluster 1 Cluster 2 Cluster 3
## Cluster 2 861.7083
## Cluster 3 1485.2203 1242.9403
## Cluster 4 2924.0972 891.6147 1142.1510
##
## Difference between the initial and final cluster prototypes
## x1 x2 x3 x4 x5 x6
## Cluster 1 -1.869302 2.892880 -0.03053992 48.06656 21.5303157 11.704630
## Cluster 2 6.620499 4.756470 0.04160781 -16.18113 0.3734489 -9.888734
## Cluster 3 -2.773047 -4.037198 0.04767975 17.44677 -27.8135438 1.452018
## Cluster 4 4.581305 -1.596147 -0.12734251 -25.34053 -31.7141002 4.241271
## x7
## Cluster 1 -0.2086385
## Cluster 2 0.4663319
## Cluster 3 0.6398172
## Cluster 4 -0.1552091
##
## Root Mean Squared Deviations (RMSD): 39.1895
## Mean Absolute Deviation (MAD): 431.5449
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 1 0.13347114 0.58502272 0.16885106 0.1126551
## 2 0.08732070 0.65372671 0.08862539 0.1703272
## 3 0.02110022 0.08214836 0.03964624 0.8571052
## 4 0.08578773 0.54088852 0.14243250 0.2308912
## 5 0.02228773 0.06208661 0.07253836 0.8430873
## ...
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 23 0.85842268 0.07772950 0.03935149 0.024496339
## 24 0.83661374 0.09283683 0.04401085 0.026538569
## 25 0.46320116 0.21113591 0.24695258 0.078710352
## 26 0.03931397 0.06782784 0.79758463 0.095273562
## 27 0.96730174 0.01631766 0.01110831 0.005272283
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 8 0.4632012 0.5389055 0.7517930 0.8475182 0.8995556 0.9673017
## Cluster 2 13 0.5408885 0.5850227 0.6957656 0.7326821 0.7852048 0.8238370
## Cluster 3 3 0.7975846 0.8293173 0.8485593 0.8610499 0.8740466 0.8870433
## Cluster 4 3 0.8430873 0.8500962 0.8584354 0.8571052 0.8661094 0.8751137
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.6106878 0.4809171
##
## Within cluster sum of squares by cluster:
## 1 2 3 4
## 1118.6255 1875.0622 221.0829 223.7435
## (between_SS / total_SS = 75.68%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
res.fcm4 <- ppclust2(f_cmeans, "fclust")
idxsf_1 <- SIL.F(res.fcm4$Xca, res.fcm4$U, alpha=1)
idxpe_1 <- PE(res.fcm4$U)
idxpc_1 <- PC(res.fcm4$U)
idxmpc_1 <- MPC(res.fcm4$U)
res.fcm5 <- ppclust2(f_cmeans_2, "fclust")
idxsf_2 <- SIL.F(res.fcm5$Xca, res.fcm5$U, alpha=1)
idxpe_2 <- PE(res.fcm5$U)
idxpc_2 <- PC(res.fcm5$U)
idxmpc_2 <- MPC(res.fcm5$U)
data.frame("Partition Entropy" = c(idxpe_1,idxpe_2),
"Partition Coefficient" = c(idxpc_1,idxpc_2),
"Modified Partition Coefficient" = c(idxmpc_1,idxmpc_2),
"Fuzzy Silhouette Index" = c(idxsf_1,idxsf_2))
## Partition.Entropy Partition.Coefficient Modified.Partition.Coefficient
## 1 0.7693229 0.6106878 0.4809171
## 2 0.7693229 0.6106878 0.4809171
## Fuzzy.Silhouette.Index
## 1 0.7105211
## 2 0.7105211
Penambahan nstart tidak mempengaruhi hasil evaluasi!
# library(ppclust)
# library(tictoc)
#
# # Inisialisasi nilai-nilai m dan jumlah cluster yang akan diuji
# m_values <- seq(1, 4, by = 0.2)
# cluster_values <- 2:6 # Misal jumlah cluster yang diuji antara 2 dan 6
#
# # List untuk menyimpan hasil dari setiap iterasi
# results <- data.frame(m = numeric(),
# clusters = numeric(),
# idxsf_2 = numeric(),
# idxpe_2 = numeric(),
# idxpc_2 = numeric(),
# idxmpc_2 = numeric())
#
# tic()
#
# # Loop untuk setiap nilai jumlah cluster
# for (clusters in cluster_values) {
# # Loop untuk setiap nilai m
# for (m_value in m_values) {
# # Lakukan fuzzy c-means clustering
# f_cmeans_2 <- fcm(data_for_clustering, centers = clusters, m = m_value, nstart = 10)
#
# # Konversi hasil clustering ke dalam format yang sesuai untuk ppclust
# res.fcm5 <- ppclust2(f_cmeans_2, "fclust")
#
# # Hitung indeks evaluasi clustering
# idxsf_2 <- SIL.F(res.fcm5$Xca, res.fcm5$U, alpha = 1)
# idxpe_2 <- PE(res.fcm5$U)
# idxpc_2 <- PC(res.fcm5$U)
# idxmpc_2 <- MPC(res.fcm5$U)
#
# # Simpan hasil ke dalam data frame
# results <- rbind(results, data.frame(m = m_value,
# clusters = clusters,
# idxsf_2 = idxsf_2,
# idxpe_2 = idxpe_2,
# idxpc_2 = idxpc_2,
# idxmpc_2 = idxmpc_2))
# }
# }
#
# toc() # Menghitung waktu yang dibutuhkan
#
# # Tampilkan hasil
# print(results)
library(ppclust)
library(tictoc)
# Inisialisasi nilai-nilai m dan jumlah cluster yang akan diuji
m_values <- seq(1, 4, by = 0.2)
# List untuk menyimpan hasil dari setiap iterasi
results <- data.frame(m = numeric(),
idxsf_2 = numeric(),
idxpe_2 = numeric(),
idxpc_2 = numeric(),
idxmpc_2 = numeric())
tic()
# Loop untuk setiap nilai jumlah cluster
for (m_value in m_values) {
# Lakukan fuzzy c-means clustering
f_cmeans_2 <- fcm(data_for_clustering, centers = 4, m = m_value, nstart = 10)
# Konversi hasil clustering ke dalam format yang sesuai untuk ppclust
res.fcm5 <- ppclust2(f_cmeans_2, "fclust")
# Hitung indeks evaluasi clustering
idxsf_2 <- SIL.F(res.fcm5$Xca, res.fcm5$U, alpha = 1)
idxpe_2 <- PE(res.fcm5$U)
idxpc_2 <- PC(res.fcm5$U)
idxmpc_2 <- MPC(res.fcm5$U)
# Simpan hasil ke dalam data frame
results <- rbind(results, data.frame(m = m_value,
idxsf_2 = idxsf_2,
idxpe_2 = idxpe_2,
idxpc_2 = idxpc_2,
idxmpc_2 = idxmpc_2))
}
toc() # Menghitung waktu yang dibutuhkan
## 89.43 sec elapsed
# Tampilkan hasil
print(results)
## m idxsf_2 idxpe_2 idxpc_2 idxmpc_2
## 1 1.0 0.6636159 2.400990e-14 1.0000000 1.00000000
## 2 1.2 0.6644244 1.136448e-02 0.9963459 0.99512790
## 3 1.4 0.6737516 1.389482e-01 0.9401275 0.92017005
## 4 1.6 0.6886347 3.621894e-01 0.8295895 0.77278600
## 5 1.8 0.7016385 5.841946e-01 0.7130106 0.61734747
## 6 2.0 0.7105211 7.693229e-01 0.6106878 0.48091706
## 7 2.2 0.7161646 9.143268e-01 0.5272310 0.36964130
## 8 2.4 0.7195712 1.025038e+00 0.4618607 0.28248091
## 9 2.6 0.5450682 1.137482e+00 0.3898330 0.18644404
## 10 2.8 0.5480200 1.184082e+00 0.3633487 0.15113156
## 11 3.0 0.5512023 1.219654e+00 0.3431285 0.12417132
## 12 3.2 0.5546791 1.247273e+00 0.3274450 0.10326001
## 13 3.4 0.5585833 1.269035e+00 0.3151071 0.08680943
## 14 3.6 0.5631297 1.286417e+00 0.3052696 0.07369279
## 15 3.8 0.5686994 1.300483e+00 0.2973231 0.06309741
## 16 4.0 0.5761823 1.312010e+00 0.2908221 0.05442946
?fcm
## starting httpd help server ... done
skim_without_charts(data = results)
Name | results |
Number of rows | 16 |
Number of columns | 5 |
_______________________ | |
Column type frequency: | |
numeric | 5 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 |
---|---|---|---|---|---|---|---|---|---|
m | 0 | 1 | 2.50 | 0.95 | 1.00 | 1.75 | 2.50 | 3.25 | 4.00 |
idxsf_2 | 0 | 1 | 0.63 | 0.07 | 0.55 | 0.56 | 0.62 | 0.69 | 0.72 |
idxpe_2 | 0 | 1 | 0.86 | 0.49 | 0.00 | 0.53 | 1.08 | 1.25 | 1.31 |
idxpc_2 | 0 | 1 | 0.54 | 0.27 | 0.29 | 0.32 | 0.43 | 0.74 | 1.00 |
idxmpc_2 | 0 | 1 | 0.39 | 0.36 | 0.05 | 0.10 | 0.23 | 0.66 | 1.00 |
#### Membuat variabel baru untuk evaluasi
normalize <- function(x) {
return((x - mean(x)) / (sd(x)))
}
# Normalisasi semua indeks
results$idxsf_2_norm <- normalize(results$idxsf_2)
results$idxpe_2_norm <- normalize(-results$idxpe_2) # Dibalik karena lebih kecil lebih baik
results$idxpc_2_norm <- normalize(results$idxpc_2)
results$idxmpc_2_norm <- normalize(results$idxmpc_2)
# Hitung skor gabungan sebagai rata-rata dari keempat indeks yang dinormalisasi
results$combined_score <- rowMeans(results[, c("idxsf_2_norm", "idxpe_2_norm", "idxpc_2_norm", "idxmpc_2_norm")])
# Misal memberikan bobot yang lebih tinggi untuk SIL.F dan PE
results$combined_score_weighted <- (0.4 * results$idxsf_2_norm) +
(0.2 * results$idxpe_2_norm) +
(0.2 * results$idxpc_2_norm) +
(0.2 * results$idxmpc_2_norm)
results %>% arrange(desc(combined_score_weighted))
## m idxsf_2 idxpe_2 idxpc_2 idxmpc_2 idxsf_2_norm idxpe_2_norm
## 1 1.0 0.6636159 2.400990e-14 1.0000000 1.00000000 0.5376437 1.7678143
## 2 1.2 0.6644244 1.136448e-02 0.9963459 0.99512790 0.5489712 1.7444566
## 3 1.4 0.6737516 1.389482e-01 0.9401275 0.92017005 0.6796542 1.4822304
## 4 1.6 0.6886347 3.621894e-01 0.8295895 0.77278600 0.8881801 1.0233969
## 5 1.8 0.7016385 5.841946e-01 0.7130106 0.61734747 1.0703763 0.5671039
## 6 2.0 0.7105211 7.693229e-01 0.6106878 0.48091706 1.1948299 0.1866047
## 7 2.2 0.7161646 9.143268e-01 0.5272310 0.36964130 1.2739020 -0.1114254
## 8 2.4 0.7195712 1.025038e+00 0.4618607 0.28248091 1.3216309 -0.3389723
## 9 2.6 0.5450682 1.137482e+00 0.3898330 0.18644404 -1.1233258 -0.5700824
## 10 2.8 0.5480200 1.184082e+00 0.3633487 0.15113156 -1.0819673 -0.6658614
## 11 4.0 0.5761823 1.312010e+00 0.2908221 0.05442946 -0.6873863 -0.9287940
## 12 3.0 0.5512023 1.219654e+00 0.3431285 0.12417132 -1.0373806 -0.7389716
## 13 3.8 0.5686994 1.300483e+00 0.2973231 0.06309741 -0.7922297 -0.9051018
## 14 3.2 0.5546791 1.247273e+00 0.3274450 0.10326001 -0.9886668 -0.7957389
## 15 3.6 0.5631297 1.286417e+00 0.3052696 0.07369279 -0.8702664 -0.8761922
## 16 3.4 0.5585833 1.269035e+00 0.3151071 0.08680943 -0.9339655 -0.8404669
## idxpc_2_norm idxmpc_2_norm combined_score combined_score_weighted
## 1 1.70642477 1.70642477 1.42957689 1.2511903
## 2 1.69273726 1.69273726 1.41972557 1.2455747
## 3 1.48215328 1.48215328 1.28154780 1.1611691
## 4 1.06809769 1.06809769 1.01194310 0.9871905
## 5 0.63141410 0.63141410 0.72507711 0.7941369
## 6 0.24813126 0.24813126 0.46942429 0.6145054
## 7 -0.06448299 -0.06448299 0.25837766 0.4614825
## 8 -0.30934837 -0.30934837 0.09099045 0.3371185
## 9 -0.57915099 -0.57915099 -0.71292753 -0.7950072
## 10 -0.67835663 -0.67835663 -0.77613549 -0.8373019
## 11 -0.95002815 -0.95002815 -0.87905916 -0.8407246
## 12 -0.75409779 -0.75409779 -0.82113694 -0.8643857
## 13 -0.92567671 -0.92567671 -0.88717122 -0.8681829
## 14 -0.81284531 -0.81284531 -0.85252408 -0.8797526
## 15 -0.89591041 -0.89591041 -0.88456985 -0.8817092
## 16 -0.85906101 -0.85906101 -0.87313861 -0.8853040
require(psych)
# Heatmap Corrrelation Pearson
plot_correlation(data = results[c(-7:-12)],
type = "all",
cor_args = list(method="spearman"),
ggtheme = theme_classic(),
theme_config = list(legend.position = "none",
axis.text.x=element_text(angle = 90)))
# Cari baris dengan skor gabungan tertinggi
best_combination <- results[which.max(results$combined_score_weighted), ]
print(best_combination)
## m idxsf_2 idxpe_2 idxpc_2 idxmpc_2 idxsf_2_norm idxpe_2_norm
## 1 1 0.6636159 2.40099e-14 1 1 0.5376437 1.767814
## idxpc_2_norm idxmpc_2_norm combined_score combined_score_weighted
## 1 1.706425 1.706425 1.429577 1.25119
Fuzzy c means dengan 4 cluster dan orde m = 1 merupakan yang paling baik dibandingkan yang lainnya. Sehingga dipilih untuk visualisasi dan interpretasi
f_cmeans_fix <- fcm(data_for_clustering, centers=4, m = 1.2, nstart = 10)
res.fcm_fix <- ppclust2(f_cmeans_fix, "fclust")
idxsf_fix <- SIL.F(res.fcm_fix$Xca, res.fcm_fix$U, alpha=1)
idxpe_fix <- PE(res.fcm_fix$U)
idxpc_fix <- PC(res.fcm_fix$U)
idxmpc_fix <- MPC(res.fcm_fix$U)
data.frame("Partition Entropy" = c(idxpe_fix),
"Partition Coefficient" = c(idxpc_fix),
"Modified Partition Coefficient" = c(idxmpc_fix),
"Fuzzy Silhouette Index" = c(idxsf_fix))
## Partition.Entropy Partition.Coefficient Modified.Partition.Coefficient
## 1 0.01136448 0.9963459 0.9951279
## Fuzzy.Silhouette.Index
## 1 0.6644244
# Load ggplot2 package
library(ggplot2)
library(cowplot)
## Warning: package 'cowplot' was built under R version 4.3.3
##
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
##
## get_legend
## The following object is masked from 'package:lubridate':
##
## stamp
# Buat scatterplot untuk setiap variabel terhadap variabel m
# Plot 1: idxsf_2 vs m
plot1 <- ggplot(results, aes(x = m, y = idxsf_2)) +
geom_point() +
geom_line() +
labs(title = "Scatterplot of Silhouette Index vs m",
x = "Derajat Fuzzy", y = "Silhouette Index") +
theme_cowplot()
# Plot 2: idxpe_2 vs m
plot2 <- ggplot(results, aes(x = m, y = idxpe_2)) +
geom_point() +
geom_line() +
labs(title = "Scatterplot of Partition Entropy vs m",
x = "Derajat Fuzzy", y = "Partition Entropy") +
theme_cowplot()
# Plot 3: idxpc_2 vs m
plot3 <- ggplot(results, aes(x = m, y = idxpc_2)) +
geom_point() +
geom_line() +
labs(title = "Scatterplot of Partition Coefficient vs m",
x = "Derajat Fuzzy", y = "Partition Coefficient") +
theme_cowplot()
# Plot 4: idxmpc_2 vs m
plot4 <- ggplot(results, aes(x = m, y = idxmpc_2)) +
geom_point() +
geom_line() +
labs(title = "Scatterplot of Modified Partition Coefficient vs m",
x = "Derajat Fuzzy", y = "Modified Partition Coefficient") +
theme_cowplot()
# Menampilkan plot
print(plot1)
print(plot2)
print(plot3)
print(plot4)
# Plot 4: idxmpc_2 vs m
plot5 <- ggplot(results, aes(x = m, y = combined_score)) +
geom_point() +
geom_line() +
labs(title = "Scatterplot of Combined Score vs m",
x = "Derajat Fuzzy", y = "Combined Score") +
theme_classic()
plot5
ggsave("plot1.png", plot = plot1, width = 10, height = 8, dpi = 300)
ggsave("plot2.png", plot = plot2, width = 10, height = 8, dpi = 300)
ggsave("plot3.png", plot = plot3, width = 10, height = 8, dpi = 300)
ggsave("plot4.png", plot = plot4, width = 10, height = 8, dpi = 300)
ggsave("plot5.png", plot = plot5, width = 10, height = 8, dpi = 300)
plotcluster(f_cmeans_fix, cp=1, trans=TRUE)
res.fcm_fix2 <- ppclust2(f_cmeans_fix, "kmeans")
fviz_cluster(res.fcm_fix2, data = data_for_clustering,
ellipse.type = "convex",
palette = "jco",
repel = TRUE)
f_cmeans_fix3 <- ppclust2(f_cmeans_fix, "fanny")
cluster::clusplot(scale(data_for_clustering), f_cmeans_fix3$cluster,
main = "Cluster plot of anemia data set",
color=TRUE, labels = 2, lines = 2, cex=1)
sil2 <- silhouette(x = f_cmeans_fix$cluster,
dist = dist(x =data_for_clustering ,method = "euclidean"))
silhouette <- fviz_silhouette(sil2,print.summary = FALSE)+
scale_color_manual(values = c("black","black","black", "black"))+
# get_palette berasal dari package ggpubr
scale_fill_manual(values = get_palette("jco",k=4))+
theme_classic()+
theme(legend.position = "top")
ggsave("Silhouette.png", plot = silhouette, width = 10, height = 8, dpi = 300)
Tidak ada kesalahan clustering, karena tidak ada amatan yang memiliki nilai SI yang negatif
sil2 |>
as.data.frame() |>
mutate(obs=1:nrow(sil2)) |>
relocate(obs) |>
filter(sil_width<0) |>
arrange(sil_width)
## [1] obs cluster neighbor sil_width
## <0 rows> (or 0-length row.names)
corrected_cluster2 <- sil2 |>
as.data.frame() |>
mutate(cluster=if_else(sil_width<0,neighbor,cluster))
library(Rtsne)
## Warning: package 'Rtsne' was built under R version 4.3.3
tic()
set.seed(123)
rtsne_cc2 <- Rtsne(data, dims = 2,perplexity = 5)
toc()
## 0.05 sec elapsed
tsne_df2 <- rtsne_cc2$Y %>%
as.data.frame() %>%
rename(
tsne1 = "V1",
tsne2 = "V2"
) |>
mutate(cluster= as.character(f_cmeans_fix$cluster),
corrected_cluster=as.character(corrected_cluster2$cluster)) %>%
mutate(Kabupaten=data$R102)
ggscatter(tsne_df2, x = "tsne1",
y = "tsne2",
label = "Kabupaten",
# penggunaan repel=TRUE menyebabnya running time meningkat saat amatan sangat banyak
repel = TRUE #nama negara jadi tidak bertumpuk
,
color = "cluster",
palette = "jco",
title = "Cluster Plot with HC median linkage")
Visalisas Tsne
data_for_clustering |>
mutate(cluster=f_cmeans_fix$cluster) |>
group_by(cluster) |>
summarise(across(everything(),mean))
## # A tibble: 4 × 8
## cluster x1 x2 x3 x4 x5 x6 x7
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 92.3 92.2 0.0633 33.7 60.8 73.9 3.84
## 2 2 90.9 93.0 0.0588 79.0 88.5 86.6 3.62
## 3 3 92.1 95.5 0.0492 51.9 86.1 79.8 3.68
## 4 4 95.1 93.9 0.0467 65.6 52.7 86.3 3.90
X1: Persentase bayi ASI Eksklusif X2: Persentase anak MP-ASI X3: Persentase anak imunisasi lengkap X4: Persentase anak jaminan kesehatan X5: Persentase akses sanitasi layak X6: Persentase akses air minum layak X7: Persentase konsumsi gizi rumah tangga
saran annisa
cluster 1: Kluster dengan Kesehatan Dasar Memadai cluster 2: Kluster dengan Risiko Kesehatan Tinggi cluster 3: Kluster dengan Akses Kesehatan Baik dan ASI Eksklusif Rendah cluster 4: Kluster dengan Fokus pada MP-ASI dan Akses Kesehatan Rendah
data_cluster <- data
data_cluster$cluster <- c(f_cmeans_fix$cluster)
# Heatmap Corrrelation Pearson
plot_correlation(data = data_cluster[-1],
type = "all",
cor_args = list(method="spearman"),
ggtheme = theme_classic(),
theme_config = list(legend.position = "none",
axis.text.x=element_text(angle = 90)))
library(dplyr)
data_for_clustering |>
mutate(cluster=f_cmeans_fix$cluster) |>
group_by(cluster) |>
summarise(across(everything(),mean)) |>
arrange(desc(x7))
## # A tibble: 4 × 8
## cluster x1 x2 x3 x4 x5 x6 x7
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 4 95.1 93.9 0.0467 65.6 52.7 86.3 3.90
## 2 1 92.3 92.2 0.0633 33.7 60.8 73.9 3.84
## 3 3 92.1 95.5 0.0492 51.9 86.1 79.8 3.68
## 4 2 90.9 93.0 0.0588 79.0 88.5 86.6 3.62
data_for_clustering |>
mutate(cluster=f_cmeans_fix$cluster) |>
group_by(cluster) |>
summarise(across(everything(),sum))
## # A tibble: 4 × 8
## cluster x1 x2 x3 x4 x5 x6 x7
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 277. 277. 0.19 101. 182. 222. 11.5
## 2 2 727. 744. 0.47 632. 708. 693. 29.0
## 3 3 1197. 1241. 0.64 675. 1120. 1037. 47.8
## 4 4 285. 282. 0.14 197. 158. 259. 11.7
f_cmeans_fix
## $u
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 1 9.789946e-05 8.772709e-04 9.983994e-01 6.254218e-04
## 2 2.858063e-04 2.793190e-05 9.996720e-01 1.422355e-05
## 3 9.999765e-01 3.201489e-08 2.294834e-05 4.879820e-07
## 4 3.563596e-03 1.156217e-04 9.958544e-01 4.664176e-04
## 5 9.999945e-01 1.384224e-08 1.755966e-06 3.724079e-06
## 6 9.999998e-01 1.229097e-09 1.096418e-07 1.184759e-07
## 7 4.870223e-06 2.472285e-06 9.999917e-01 9.613304e-07
## 8 4.332586e-07 6.007304e-05 9.999392e-01 3.229373e-07
## 9 1.250283e-06 2.821751e-04 9.997149e-01 1.637070e-06
## 10 1.392111e-04 1.080750e-05 9.998403e-01 9.652632e-06
## 11 1.916028e-05 3.169220e-03 9.967959e-01 1.574779e-05
## 12 1.393972e-05 1.963055e-05 9.999654e-01 1.002003e-06
## 13 3.689103e-05 6.167035e-06 9.999559e-01 1.000516e-06
## 14 4.978894e-05 9.893638e-03 9.900254e-01 3.114943e-05
## 15 3.920929e-05 2.824535e-03 9.970468e-01 8.946920e-05
## 16 7.208105e-10 9.999997e-01 3.298154e-07 1.872132e-08
## 17 3.606909e-05 2.372780e-06 9.999607e-01 8.220383e-07
## 18 5.009431e-06 9.835843e-01 1.634203e-02 6.870594e-05
## 19 3.920701e-06 9.974987e-01 3.657127e-04 2.131685e-03
## 20 5.774673e-08 1.182002e-08 1.585602e-08 9.999999e-01
## 21 2.218388e-07 1.547941e-06 5.352742e-07 9.999977e-01
## 22 1.931842e-08 9.999943e-01 5.236320e-06 4.601079e-07
## 23 1.252774e-07 9.999324e-01 6.632882e-05 1.124285e-06
## 24 8.735545e-08 9.999300e-01 6.901695e-05 9.202931e-07
## 25 1.696532e-05 9.921733e-01 4.693065e-03 3.116621e-03
## 26 2.010210e-05 7.457159e-07 6.917615e-06 9.999722e-01
## 27 5.827956e-12 1.000000e+00 2.728804e-09 1.871638e-10
##
## $v
## x1 x2 x3 x4 x5 x6 x7
## Cluster 1 92.30961 92.21554 0.06332723 33.73207 60.82628 73.85286 3.843237
## Cluster 2 90.88895 93.01076 0.05889572 79.03480 88.53817 86.56778 3.621260
## Cluster 3 92.11427 95.49340 0.04907357 51.88424 86.13247 79.79236 3.679420
## Cluster 4 95.10359 93.93061 0.04669033 65.57987 52.69128 86.27682 3.896530
##
## $v0
## x1 x2 x3 x4 x5 x6 x7
## Cluster 1 87.50 96.43 0.07 42.3280 90.10 79.09 3.72
## Cluster 2 85.71 90.91 0.00 67.4912 86.31 90.38 3.18
## Cluster 3 94.59 96.00 0.20 86.8881 94.39 86.97 3.68
## Cluster 4 91.35 88.73 0.05 50.1543 75.04 81.05 3.60
##
## $d
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 1 1007.98470 650.10463 159.12893 695.62533
## 2 750.44667 1194.85197 146.74479 1367.51656
## 3 87.97669 2775.20578 745.10136 1609.47720
## 4 536.64749 1065.26809 173.95128 805.95688
## 5 78.06845 2912.28438 1105.53331 951.19879
## 6 57.69870 3493.40328 1422.88823 1401.00587
## 7 894.56107 1024.47194 77.46767 1237.50008
## 8 1498.44311 558.79662 79.98250 1589.15428
## 9 1556.14746 526.43975 102.67795 1474.48025
## 10 757.80408 1263.41698 128.32332 1292.29773
## 11 1747.64209 629.13286 199.16435 1817.56066
## 12 1388.22476 1296.35540 148.35889 2350.37282
## 13 999.17761 1428.93204 129.72709 2055.80553
## 14 1877.13635 651.40515 259.29439 2061.73277
## 15 1250.45585 531.56547 164.43833 1060.25773
## 16 3310.86454 49.14798 972.41454 1725.99027
## 17 818.61142 1410.76071 105.80550 1743.91949
## 18 2107.94837 184.18546 417.98091 1248.58695
## 19 2682.19062 222.52557 1082.76540 761.05802
## 20 1480.18938 2032.82102 1916.83145 52.79864
## 21 1706.93561 1157.37419 1431.22985 79.69386
## 22 4143.71372 118.73659 1351.38422 2197.92134
## 23 3782.69573 157.53673 1079.03489 2438.93727
## 24 3532.63532 136.88721 930.17711 2205.82761
## 25 2072.21948 230.69099 673.06472 730.48505
## 26 770.44180 1488.92710 953.66719 88.59119
## 27 3342.08777 18.92871 977.07219 1669.84717
##
## $x
## x1 x2 x3 x4 x5 x6 x7
## 1 96.52 98.63 0.07 57.7483 77.28 83.93 3.60
## 2 92.39 96.83 0.03 50.5455 81.35 68.83 3.89
## 3 93.33 90.12 0.09 31.6522 69.67 73.93 3.86
## 4 91.35 88.73 0.05 50.1543 75.04 81.05 3.60
## 5 92.59 98.39 0.03 36.6951 56.78 77.68 4.10
## 6 91.01 88.14 0.07 32.8277 56.01 69.94 3.57
## 7 97.59 97.96 0.00 48.6842 83.12 84.47 3.23
## 8 95.52 92.11 0.04 57.5531 90.85 78.22 3.41
## 9 98.72 96.00 0.00 58.5742 89.78 80.58 4.00
## 10 92.86 100.00 0.00 44.9799 81.43 85.93 3.72
## 11 85.07 91.67 0.04 55.1300 92.64 88.85 3.66
## 12 91.07 96.15 0.03 44.4327 95.67 80.38 3.72
## 13 87.50 96.43 0.07 42.3280 90.10 79.09 3.72
## 14 87.65 93.88 0.19 60.6982 94.16 70.07 3.98
## 15 89.47 96.77 0.09 61.7424 81.73 73.53 3.86
## 16 88.79 95.59 0.03 82.6220 89.73 81.69 3.71
## 17 91.75 96.23 0.03 41.9298 86.62 82.19 3.45
## 18 85.71 90.91 0.00 67.4912 86.31 90.38 3.18
## 19 96.92 92.31 0.04 81.3657 75.15 87.52 3.90
## 20 95.00 94.29 0.00 66.0589 45.59 87.69 3.94
## 21 96.88 92.50 0.06 73.2582 55.68 83.71 3.78
## 22 94.59 96.00 0.20 86.8881 94.39 86.97 3.68
## 23 88.61 92.16 0.05 82.0101 98.30 79.68 3.80
## 24 94.44 89.13 0.03 76.8707 97.78 90.94 3.55
## 25 84.91 96.67 0.12 73.5336 76.25 87.10 3.56
## 26 93.44 95.00 0.08 57.4074 56.76 87.43 3.97
## 27 93.02 91.30 0.00 81.3433 90.18 88.42 3.58
##
## $cluster
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 3 3 1 3 1 1 3 3 3 3 3 3 3 3 3 2 3 2 2 4 4 2 2 2 2 4
## 27
## 2
##
## $csize
## 1 2 3 4
## 3 8 13 3
##
## $sumsqrs
## $sumsqrs$between.ss
## [1] 10887.16
##
## $sumsqrs$within.ss
## 1 2 3 4
## 223.7435 1118.6255 1875.0622 221.0829
##
## $sumsqrs$tot.within.ss
## [1] 3438.514
##
## $sumsqrs$tot.ss
## [1] 14325.68
##
##
## $k
## [1] 4
##
## $m
## [1] 1.2
##
## $iter
## [1] 11 63 10 76 57 9 78 10 75 21
##
## $best.start
## [1] 1
##
## $func.val
## [1] 3436.481 4538.821 3436.481 4538.821 4538.821 3436.481 4538.821 3436.481
## [9] 4538.821 3436.481
##
## $comp.time
## [1] 0.03 0.20 0.04 0.19 0.12 0.02 0.19 0.02 0.19 0.05
##
## $inpargs
## $inpargs$iter.max
## [1] 1000
##
## $inpargs$con.val
## [1] 1e-09
##
## $inpargs$dmetric
## [1] "sqeuclidean"
##
## $inpargs$alginitv
## [1] "kmpp"
##
## $inpargs$alginitu
## [1] "imembrand"
##
## $inpargs$fixcent
## [1] FALSE
##
## $inpargs$fixmemb
## [1] FALSE
##
## $inpargs$stand
## [1] FALSE
##
##
## $algorithm
## [1] "FCM"
##
## $call
## fcm(x = data_for_clustering, centers = 4, m = 1.2, nstart = 10)
##
## attr(,"class")
## [1] "ppclust"
summary(f_cmeans_fix)
## Summary for 'f_cmeans_fix'
##
## Number of data objects: 27
##
## Number of clusters: 4
##
## Crisp clustering vector:
## [1] 3 3 1 3 1 1 3 3 3 3 3 3 3 3 3 2 3 2 2 4 4 2 2 2 2 4 2
##
## Initial cluster prototypes:
## x1 x2 x3 x4 x5 x6 x7
## Cluster 1 87.50 96.43 0.07 42.3280 90.10 79.09 3.72
## Cluster 2 85.71 90.91 0.00 67.4912 86.31 90.38 3.18
## Cluster 3 94.59 96.00 0.20 86.8881 94.39 86.97 3.68
## Cluster 4 91.35 88.73 0.05 50.1543 75.04 81.05 3.60
##
## Final cluster prototypes:
## x1 x2 x3 x4 x5 x6 x7
## Cluster 1 92.30961 92.21554 0.06332723 33.73207 60.82628 73.85286 3.843237
## Cluster 2 90.88895 93.01076 0.05889573 79.03480 88.53817 86.56778 3.621260
## Cluster 3 92.11427 95.49340 0.04907357 51.88424 86.13247 79.79236 3.679420
## Cluster 4 95.10359 93.93061 0.04669033 65.57987 52.69128 86.27682 3.896530
##
## Distance between the final cluster prototypes
## Cluster 1 Cluster 2 Cluster 3
## Cluster 2 2984.6558
## Cluster 3 1015.9917 796.5153
## Cluster 4 1245.5660 1484.8049 1359.3573
##
## Difference between the initial and final cluster prototypes
## x1 x2 x3 x4 x5 x6
## Cluster 1 4.809615 -4.214456 -0.006672769 -8.59593 -29.273721 -5.237136
## Cluster 2 5.178952 2.100765 0.058895725 11.54360 2.228171 -3.812216
## Cluster 3 -2.475733 -0.506595 -0.150926430 -35.00386 -8.257531 -7.177640
## Cluster 4 3.753587 5.200605 -0.003309668 15.42557 -22.348723 5.226821
## x7
## Cluster 1 0.123236792
## Cluster 2 0.441260011
## Cluster 3 -0.000580016
## Cluster 4 0.296529676
##
## Root Mean Squared Deviations (RMSD): 28.89941
## Mean Absolute Deviation (MAD): 321.0421
##
## Membership degrees matrix (top and bottom 5 rows):
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 1 0.000097899 0.000877271 0.998399408 0.000625422
## 2 0.000285806 0.000027932 0.999672038 0.000014224
## 3 0.999976532 0.000000032 0.000022948 0.000000488
## 4 0.003563596 0.000115622 0.995854365 0.000466418
## 5 0.999994506 0.000000014 0.000001756 0.000003724
## ...
## Cluster 1 Cluster 2 Cluster 3 Cluster 4
## 23 1.2500e-07 0.999932422 0.000066329 0.000001124
## 24 8.7000e-08 0.999929975 0.000069017 0.000000920
## 25 1.6965e-05 0.992173348 0.004693065 0.003116621
## 26 2.0102e-05 0.000000746 0.000006918 0.999972235
## 27 0.0000e+00 0.999999997 0.000000003 0.000000000
##
## Descriptive statistics for the membership degrees by clusters
## Size Min Q1 Mean Median Q3 Max
## Cluster 1 3 0.9999765 0.9999855 0.9999903 0.9999945 0.9999971 0.9999998
## Cluster 2 8 0.9835843 0.9961673 0.9966391 0.9999312 0.9999956 1.0000000
## Cluster 3 13 0.9900254 0.9970468 0.9982432 0.9997149 0.9999559 0.9999917
## Cluster 4 3 0.9999722 0.9999850 0.9999899 0.9999977 0.9999988 0.9999999
##
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized
## 0.9963459 0.9951279
##
## Within cluster sum of squares by cluster:
## 1 2 3 4
## 223.7435 1118.6255 1875.0622 221.0829
## (between_SS / total_SS = 76%)
##
## Available components:
## [1] "u" "v" "v0" "d" "x"
## [6] "cluster" "csize" "sumsqrs" "k" "m"
## [11] "iter" "best.start" "func.val" "comp.time" "inpargs"
## [16] "algorithm" "call"
# Load the libraries
library(sf)
## Warning: package 'sf' was built under R version 4.3.3
## Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE
library(ggplot2)
library(dplyr)
data_cluster
## R102 x1 x2 x3 x4 x5 x6 x7 cluster
## 1 1 96.52 98.63 0.07 57.7483 77.28 83.93 3.60 3
## 2 2 92.39 96.83 0.03 50.5455 81.35 68.83 3.89 3
## 3 3 93.33 90.12 0.09 31.6522 69.67 73.93 3.86 1
## 4 4 91.35 88.73 0.05 50.1543 75.04 81.05 3.60 3
## 5 5 92.59 98.39 0.03 36.6951 56.78 77.68 4.10 1
## 6 6 91.01 88.14 0.07 32.8277 56.01 69.94 3.57 1
## 7 7 97.59 97.96 0.00 48.6842 83.12 84.47 3.23 3
## 8 8 95.52 92.11 0.04 57.5531 90.85 78.22 3.41 3
## 9 9 98.72 96.00 0.00 58.5742 89.78 80.58 4.00 3
## 10 10 92.86 100.00 0.00 44.9799 81.43 85.93 3.72 3
## 11 11 85.07 91.67 0.04 55.1300 92.64 88.85 3.66 3
## 12 12 91.07 96.15 0.03 44.4327 95.67 80.38 3.72 3
## 13 13 87.50 96.43 0.07 42.3280 90.10 79.09 3.72 3
## 14 14 87.65 93.88 0.19 60.6982 94.16 70.07 3.98 3
## 15 15 89.47 96.77 0.09 61.7424 81.73 73.53 3.86 3
## 16 16 88.79 95.59 0.03 82.6220 89.73 81.69 3.71 2
## 17 17 91.75 96.23 0.03 41.9298 86.62 82.19 3.45 3
## 18 18 85.71 90.91 0.00 67.4912 86.31 90.38 3.18 2
## 19 71 96.92 92.31 0.04 81.3657 75.15 87.52 3.90 2
## 20 72 95.00 94.29 0.00 66.0589 45.59 87.69 3.94 4
## 21 73 96.88 92.50 0.06 73.2582 55.68 83.71 3.78 4
## 22 74 94.59 96.00 0.20 86.8881 94.39 86.97 3.68 2
## 23 75 88.61 92.16 0.05 82.0101 98.30 79.68 3.80 2
## 24 76 94.44 89.13 0.03 76.8707 97.78 90.94 3.55 2
## 25 77 84.91 96.67 0.12 73.5336 76.25 87.10 3.56 2
## 26 78 93.44 95.00 0.08 57.4074 56.76 87.43 3.97 4
## 27 79 93.02 91.30 0.00 81.3433 90.18 88.42 3.58 2
# Load the CSV data
data_clustered <- read.csv("Summary_Cluster tableau.csv", sep = ";")
data_clustered$Cluster <- data_cluster$cluster
# Load shapefile of the regions (replace with the path to your shapefile)
shapefile <- st_read("RBI_50K_2023_Jawa Barat.shp")
## Reading layer `RBI_50K_2023_Jawa Barat' from data source
## `C:\Users\Administrator\Documents\kuliah\Akademik\Kampus\Kuliah\S2-Semester 3\Pemodelan Klasifikasi\RBI_50K_2023_Jawa Barat.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY, XYZ
## Bounding box: xmin: 106.3703 ymin: -7.82099 xmax: 108.8468 ymax: -5.806538
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
# Gabungkan shapefile dengan data kluster
data_merged <- shapefile %>%
left_join(data_clustered, by = c("NAMOBJ" = "Kabupaten.kota"))
# Periksa hasil penggabungan
head(data_merged)
## Simple feature collection with 6 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension: XYZ
## Bounding box: xmin: 106.4012 ymin: -7.574692 xmax: 108.7234 ymax: -5.918425
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
## NAMOBJ FCODE REMARK METADATA SRS_ID KDBBPS
## 1 Bandung BA03050040 <NA> TASWIL5000020230907KABKOTA 4326 <NA>
## 2 Bandung Barat BA03050040 <NA> TASWIL5000020230907KABKOTA 4326 <NA>
## 3 Bekasi BA03050040 <NA> TASWIL5000020230907KABKOTA 4326 <NA>
## 4 Bogor BA03050040 <NA> TASWIL5000020230907KABKOTA 4326 <NA>
## 5 Ciamis BA03050040 <NA> TASWIL5000020230907KABKOTA 4326 <NA>
## 6 Cianjur BA03050040 <NA> TASWIL5000020230907KABKOTA 4326 <NA>
## KDCBPS KDCPUM KDEBPS KDEPUM KDPBPS KDPKAB KDPPUM LUASWH TIPADM WADMKC
## 1 <NA> <NA> <NA> <NA> <NA> 32.04 32 1740.843 4 <NA>
## 2 <NA> <NA> <NA> <NA> <NA> 32.17 32 1283.439 4 <NA>
## 3 <NA> <NA> <NA> <NA> <NA> 32.16 32 1251.054 4 <NA>
## 4 <NA> <NA> <NA> <NA> <NA> 32.01 32 2991.778 4 <NA>
## 5 <NA> <NA> <NA> <NA> <NA> 32.07 32 1595.937 4 <NA>
## 6 <NA> <NA> <NA> <NA> <NA> 32.03 32 3631.915 4 <NA>
## WADMKD WADMKK WADMPR WIADKC WIADKK WIADPR WIADKD SHAPE_Leng
## 1 <NA> Bandung Jawa Barat <NA> <NA> <NA> 0 3.242820
## 2 <NA> Bandung Barat Jawa Barat <NA> Bandung <NA> 0 2.564628
## 3 <NA> Bekasi Jawa Barat <NA> <NA> <NA> 0 3.135190
## 4 <NA> Bogor Jawa Barat <NA> <NA> <NA> 0 4.526551
## 5 <NA> Ciamis Jawa Barat <NA> <NA> <NA> 0 3.004328
## 6 <NA> Cianjur Jawa Barat <NA> <NA> <NA> 0 4.398638
## SHAPE_Area R102 Cluster Latitude Longitude geometry
## 1 0.1424909 32.04 3 -6.914744 107.6098 MULTIPOLYGON Z (((107.9147 ...
## 2 0.1050062 32.17 3 -6.854496 107.4726 MULTIPOLYGON Z (((107.5938 ...
## 3 0.1022227 32.16 2 -6.236853 107.0055 MULTIPOLYGON Z (((107.0159 ...
## 4 0.2446141 32.01 3 -6.595038 106.8166 MULTIPOLYGON Z (((106.991 -...
## 5 0.1306842 32.07 3 -7.332494 108.3538 MULTIPOLYGON Z (((108.3857 ...
## 6 0.2973014 32.03 1 -6.820958 107.1424 MULTIPOLYGON Z (((107.3021 ...
data_merged <- st_make_valid(data_merged)
# Contoh warna untuk masing-masing cluster
custom_colors <- c("1" = "#3d3e9f", "2" = "#5c5fd4", "3" = "#8a8aff", "4" = "#b7b7ff")
# Plot dengan warna yang ditentukan secara manual
tes <- ggplot(data_merged) +
geom_sf(aes(fill = as.factor(Cluster))) +
scale_fill_manual(values = custom_colors) +
labs(title = "Clustering of Kabupaten/Kota di Jawa Barat",
fill = "Cluster") +
theme_cowplot()
library(sf)
library(ggplot2)
library(dplyr)
# Misalnya shapefile sudah dibaca dan digabungkan dengan data kluster
# Contoh data_merge adalah hasil join antara shapefile dan data kluster
ggplot(data_merged) +
geom_sf(aes(fill = as.factor(Cluster))) +
scale_fill_manual(values = custom_colors) +
labs(title = "Clustering of Kabupaten/Kota di Jawa Barat",
fill = "Cluster") +
theme_cowplot()
ggsave("peta_jawa_barat_kluster_custom_colors.png",tes, width = 10, height = 8)