Input Library

if (!require(fclust)) {
  install.packages("fclust")
}

## Loading required package: fclust

## Warning: package 'fclust' was built under R version 4.3.3

library(fclust)
library(e1071)

## Warning: package 'e1071' was built under R version 4.3.3

## Registered S3 method overwritten by 'e1071':
##   method       from  
##   print.fclust fclust

library(cluster)

## Warning: package 'cluster' was built under R version 4.3.3

library(fclust)
library(frbs)

## Warning: package 'frbs' was built under R version 4.3.3

library("kernlab")

## Warning: package 'kernlab' was built under R version 4.3.3

library("tidyverse")

## Warning: package 'tidyverse' was built under R version 4.3.2

## Warning: package 'ggplot2' was built under R version 4.3.3

## Warning: package 'tibble' was built under R version 4.3.2

## Warning: package 'tidyr' was built under R version 4.3.3

## Warning: package 'purrr' was built under R version 4.3.2

## Warning: package 'dplyr' was built under R version 4.3.3

## Warning: package 'stringr' was built under R version 4.3.2

## Warning: package 'lubridate' was built under R version 4.3.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::alpha() masks kernlab::alpha()
## ✖ purrr::cross()   masks kernlab::cross()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library("factoextra")

## Warning: package 'factoextra' was built under R version 4.3.3

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library("cluster")
library("DataExplorer")

## Warning: package 'DataExplorer' was built under R version 4.3.2

library("ggpubr")

## Warning: package 'ggpubr' was built under R version 4.3.3

library("tictoc")

## Warning: package 'tictoc' was built under R version 4.3.3

## 
## Attaching package: 'tictoc'
## 
## The following object is masked from 'package:kernlab':
## 
##     size

Input Data

# Membaca semua file CSV
x1 <- read.csv("x1_asi eksklusif (2).csv")
x2 <- read.csv("x2_persentase pendamping.csv")
x3 <- read.csv("x3_Persentase Imunisasi.csv")
x4 <- read.csv("x4_Jaminan.csv")
x5 <- read.csv("x5_sanitasi layak.csv")
x6 <- read.csv("x6_persentase air minum layak.csv")
x7 <- read.csv("x7_Gizi Percentage mean.csv")
kota <- read.csv("Summary_Cluster tableau.csv", sep = ";")

data <- data.frame("R102" = x1$R102, "x1" = round(x1$persentase_asi_eksklusif,2), 
                   "x2" = round(x2$persentase_pendamping,2),
                   "x3" = round(x3$persentase_lengkap,2) , 
                   "x4" = round(x4$persentase_memiliki_jaminan,4), 
                 "x5" = round(x5$Persentase,2), 
                 "x6" = round(x6$Persentase,2),
                 "x7" = x7$Gizi.Percentage)
data_kota <- data.frame("R102" = x1$R102, "x1" = round(x1$persentase_asi_eksklusif,2), 
                   "x2" = round(x2$persentase_pendamping,2),
                   "x3" = round(x3$persentase_lengkap,2) , 
                   "x4" = round(x4$persentase_memiliki_jaminan,4), 
                 "x5" = round(x5$Persentase,2), 
                 "x6" = round(x6$Persentase,2),
                 "x7" = x7$Gizi.Percentage,
                 "kota" = kota$Kabupaten.kota)
data

##    R102    x1     x2   x3      x4    x5    x6   x7
## 1     1 96.52  98.63 0.07 57.7483 77.28 83.93 3.60
## 2     2 92.39  96.83 0.03 50.5455 81.35 68.83 3.89
## 3     3 93.33  90.12 0.09 31.6522 69.67 73.93 3.86
## 4     4 91.35  88.73 0.05 50.1543 75.04 81.05 3.60
## 5     5 92.59  98.39 0.03 36.6951 56.78 77.68 4.10
## 6     6 91.01  88.14 0.07 32.8277 56.01 69.94 3.57
## 7     7 97.59  97.96 0.00 48.6842 83.12 84.47 3.23
## 8     8 95.52  92.11 0.04 57.5531 90.85 78.22 3.41
## 9     9 98.72  96.00 0.00 58.5742 89.78 80.58 4.00
## 10   10 92.86 100.00 0.00 44.9799 81.43 85.93 3.72
## 11   11 85.07  91.67 0.04 55.1300 92.64 88.85 3.66
## 12   12 91.07  96.15 0.03 44.4327 95.67 80.38 3.72
## 13   13 87.50  96.43 0.07 42.3280 90.10 79.09 3.72
## 14   14 87.65  93.88 0.19 60.6982 94.16 70.07 3.98
## 15   15 89.47  96.77 0.09 61.7424 81.73 73.53 3.86
## 16   16 88.79  95.59 0.03 82.6220 89.73 81.69 3.71
## 17   17 91.75  96.23 0.03 41.9298 86.62 82.19 3.45
## 18   18 85.71  90.91 0.00 67.4912 86.31 90.38 3.18
## 19   71 96.92  92.31 0.04 81.3657 75.15 87.52 3.90
## 20   72 95.00  94.29 0.00 66.0589 45.59 87.69 3.94
## 21   73 96.88  92.50 0.06 73.2582 55.68 83.71 3.78
## 22   74 94.59  96.00 0.20 86.8881 94.39 86.97 3.68
## 23   75 88.61  92.16 0.05 82.0101 98.30 79.68 3.80
## 24   76 94.44  89.13 0.03 76.8707 97.78 90.94 3.55
## 25   77 84.91  96.67 0.12 73.5336 76.25 87.10 3.56
## 26   78 93.44  95.00 0.08 57.4074 56.76 87.43 3.97
## 27   79 93.02  91.30 0.00 81.3433 90.18 88.42 3.58

Analisis Deskriptif

Intro

plot_intro(data,ggtheme = theme_pubr())

## Sebaran

plot_density(data[-1],binary_as_factor = FALSE,
             geom_density_args = list(fill="red"),
             nrow = 3,
             ncol = 3,
             ggtheme = theme_pubr(base_size = 9))

# Boxplot tiap Prediktor
tmp <- par(mfrow=c(3,4))

names <- colnames(data[-1])
for(i in names){
    boxplot(data[-1][[i]], horizontal=T, col='red')
    title(i)
}
par(tmp)

## Outlier

# Misalkan df adalah nama dataframe dan 'peubah' adalah nama kolom yang ingin kamu cek outlier-nya
# Menghitung Q1 (kuartil pertama) dan Q3 (kuartil ketiga)
Q1 <- quantile(data[-1]$x3, 0.25)
Q3 <- quantile(data[-1]$x3, 0.75)

# Menghitung IQR
IQR_value <- IQR(data[-1]$x3)

# Batas bawah dan batas atas untuk outlier
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

# Mengambil outlier
outliers <- data[data[-1]$x3 < lower_bound | data[-1]$x3 > upper_bound, ]

# Melihat outlier
print(outliers)

##    R102    x1    x2   x3      x4    x5    x6   x7
## 14   14 87.65 93.88 0.19 60.6982 94.16 70.07 3.98
## 22   74 94.59 96.00 0.20 86.8881 94.39 86.97 3.68

# Misalkan df adalah nama dataframe dan 'peubah' adalah nama kolom yang ingin kamu cek outlier-nya
# Menghitung Q1 (kuartil pertama) dan Q3 (kuartil ketiga)
Q1 <- quantile(data[-1]$x5, 0.25)
Q3 <- quantile(data[-1]$x5, 0.75)

# Menghitung IQR
IQR_value <- IQR(data[-1]$x5)

# Batas bawah dan batas atas untuk outlier
lower_bound <- Q1 - 1.5 * IQR_value
upper_bound <- Q3 + 1.5 * IQR_value

# Mengambil outlier
outliers <- data[data[-1]$x5 < lower_bound | data[-1]$x5 > upper_bound, ]

# Melihat outlier
print(outliers)

##    R102 x1    x2 x3      x4    x5    x6   x7
## 20   72 95 94.29  0 66.0589 45.59 87.69 3.94

Statistik

library(skimr)

## Warning: package 'skimr' was built under R version 4.3.2

skim_without_charts(data = data[-1])

Data summary
Name	data[-1]
Number of rows	27
Number of columns	7
_______________________
Column type frequency:
numeric	7
________________________
Group variables	None

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100
x1	1	92.10	3.88	84.91	89.13	92.59	94.80	98.72
x2	1	94.22	3.26	88.14	91.89	95.00	96.55	100.00
x3	1	0.05	0.05	0.00	0.03	0.04	0.07	0.20
x4	1	59.43	16.30	31.65	46.83	57.75	73.40	86.89
x5	1	80.31	14.78	45.59	75.10	83.12	90.52	98.30
x6	1	81.86	6.38	68.83	78.66	82.19	87.26	90.94
x7	1	3.70	0.23	3.18	3.58	3.72	3.88	4.10

data_kota$kota[data_kota$x1 == min(data_kota$x1)]

## [1] "Kota Cimahi"

data_kota$kota[data_kota$x2 == min(data_kota$x2)]

## [1] "Tasikmalaya"

data_kota$kota[data_kota$x3 == min(data_kota$x3)]

## [1] "Ciamis"        "Cirebon"       "Majalengka"    "Pangandaran"  
## [5] "Kota Sukabumi" "Kota Banjar"

data_kota$kota[data_kota$x4 == min(data_kota$x4)]

## [1] "Cianjur"

data_kota$kota[data_kota$x5 == min(data_kota$x5)]

## [1] "Kota Sukabumi"

data_kota$kota[data_kota$x6 == min(data_kota$x6)]

## [1] "Sukabumi"

data_kota$kota[data_kota$x7 == min(data_kota$x7)]

## [1] "Pangandaran"

data_kota$kota[data_kota$x1 == max(data_kota$x1)]

## [1] "Cirebon"

data_kota$kota[data_kota$x2 == max(data_kota$x2)]

## [1] "Majalengka"

data_kota$kota[data_kota$x3 == max(data_kota$x3)]

## [1] "Kota Cirebon"

data_kota$kota[data_kota$x4 == max(data_kota$x4)]

## [1] "Kota Cirebon"

data_kota$kota[data_kota$x5 == max(data_kota$x5)]

## [1] "Kota Bekasi"

data_kota$kota[data_kota$x6 == max(data_kota$x6)]

## [1] "Kota Depok"

data_kota$kota[data_kota$x7 == max(data_kota$x7)]

## [1] "Garut"

# Menghitung kota dengan nilai minimum
min_kota <- sapply(data_kota[, paste0("x", 1:7)], function(x) data_kota$kota[which.min(x)])

# Menghitung kota dengan nilai maksimum
max_kota <- sapply(data_kota[, paste0("x", 1:7)], function(x) data_kota$kota[which.max(x)])

# Membuat data_kotaframe yang rapi dan informatif
result_df <- data.frame(
  Variable = paste0("x", 1:7),
  Min_Kota = min_kota,
  Max_Kota = max_kota
)

# Fungsi untuk menangani beberapa hasil minimum
min_kota <- sapply(data[, paste0("x", 1:7)], function(x) paste(data$kota[which(x == min(x))], collapse = ", "))

# Fungsi untuk menangani beberapa hasil maksimum
max_kota <- sapply(data[, paste0("x", 1:7)], function(x) paste(data$kota[which(x == max(x))], collapse = ", "))

# Membuat dataframe yang rapi dan informatif
result_df <- data.frame(
  Min_Kota = min_kota,
  Max_Kota = max_kota
)

# Mengubah nama kolom menjadi lebih ringkas
rownames(result_df) <- c("Persentase_ASI_Eksklusif", 
                    "Persentase_MP_ASI", 
                    "Persentase_Imunisasi_Lengkap", 
                    "Persentase_Jaminan_Kesehatan", 
                    "Persentase_Akses_Sanitasi_Layak", 
                    "Persentase_Akses_Air_Layak", 
                    "Persentase_Konsumsi_Gizi")

# Menampilkan hasil
print(result_df)

##                                 Min_Kota Max_Kota
## Persentase_ASI_Eksklusif                         
## Persentase_MP_ASI                                
## Persentase_Imunisasi_Lengkap                     
## Persentase_Jaminan_Kesehatan                     
## Persentase_Akses_Sanitasi_Layak                  
## Persentase_Akses_Air_Layak                       
## Persentase_Konsumsi_Gizi

korelasi

# Heatmap Corrrelation Pearson
plot_correlation(data = data[-1],
                 type = "all",
                 cor_args = list(method="spearman"),
                 ggtheme = theme_classic(),
                 theme_config = list(legend.position = "none",
                                     axis.text.x=element_text(angle = 90)))

Tidak ada yang berkorelasi

require(psych)

## Loading required package: psych

## Warning: package 'psych' was built under R version 4.3.3

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## The following object is masked from 'package:kernlab':
## 
##     alpha

pairs.panels(data[-1], method = "spearman")

# column wise missing values 
colSums(is.na(data))

## R102   x1   x2   x3   x4   x5   x6   x7 
##    0    0    0    0    0    0    0    0

# Handle missing values 
data<- na.omit(data)

Gaada data kosong

Fuzzy C means Clustering

data_for_clustering <- data[, c("x1", "x2", "x3", "x4","x5","x6","x7")]

Pemilihan Cluster yang optimum

tic()
set.seed(123)
fviz_nbclust(
  x = data_for_clustering,
  FUNcluster = hcut,
  method = "wss",
  hc_method = "complete",
  hc_fun = "hclust",
  k.max = 25
)

Penurunan Drastis terakhir ada di cluster 3 ke 4. Sehingga, cluster 4 mungkin cluster yang optimum

metode_agg <- c("single","complete",
                "average","ward.D","median","centroid",
                "mcquitty")

map(metode_agg, function(i)
  
  fviz_nbclust(
  x = data_for_clustering,
  FUNcluster = hcut,
  method = "silhouette",
  hc_method = i,
  hc_fun = "hclust",
  k.max = 25
)+
  ggtitle(str_c("Optimal number of clusters based on HC with ",i," linkage"))
)

## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

Seluruh grafik menunjukkan cluster optimum pada 4 cluster.

Karena dua alasan tadi, oleh karena itu dipilih cluster optimum yaitu 4 cluster

Clustering

Clustering tanpa nstart

library(ppclust)

## Warning: package 'ppclust' was built under R version 4.3.3

## 
## Attaching package: 'ppclust'

## The following object is masked from 'package:psych':
## 
##     pca

f_cmeans <- fcm(data_for_clustering, centers=4, m = 2)
as.data.frame(f_cmeans$u)

##     Cluster 1  Cluster 2  Cluster 3   Cluster 4
## 1  0.58502272 0.13347114 0.16885106 0.112655073
## 2  0.65372671 0.08732070 0.08862539 0.170327195
## 3  0.08214835 0.02110022 0.03964624 0.857105182
## 4  0.54088852 0.08578773 0.14243250 0.230891241
## 5  0.06208661 0.02228773 0.07253836 0.843087294
## 6  0.05161885 0.02021903 0.05304843 0.875113689
## 7  0.82383695 0.05169384 0.05110991 0.073359299
## 8  0.78520483 0.11751085 0.04731261 0.049971717
## 9  0.74203820 0.14307740 0.05932692 0.055557476
## 10 0.73268206 0.06298265 0.07280334 0.131531945
## 11 0.65676884 0.18779989 0.07630671 0.079124560
## 12 0.79054528 0.07882846 0.04776771 0.082858557
## 13 0.78232484 0.06294722 0.04910546 0.105622483
## 14 0.56893551 0.25165701 0.08583475 0.093572731
## 15 0.58261147 0.19297657 0.12276917 0.101642785
## 16 0.03686169 0.92659219 0.02443773 0.012108398
## 17 0.80036653 0.04981330 0.04594886 0.103871320
## 18 0.29024945 0.52969310 0.11508916 0.064968285
## 19 0.14261944 0.54197636 0.25096090 0.064443306
## 20 0.04277214 0.03681621 0.86104995 0.059361693
## 21 0.03712983 0.04121985 0.88704333 0.034606991
## 22 0.05326938 0.89054334 0.03722535 0.018961925
## 23 0.07772950 0.85842268 0.03935149 0.024496339
## 24 0.09283683 0.83661374 0.04401085 0.026538569
## 25 0.21113591 0.46320116 0.24695258 0.078710352
## 26 0.06782784 0.03931397 0.79758463 0.095273562
## 27 0.01631766 0.96730174 0.01110831 0.005272283

summary(f_cmeans)

## Summary for 'f_cmeans'
## 
## Number of data objects:  27 
## 
## Number of clusters:  4 
## 
## Crisp clustering vector:
##  [1] 1 1 4 1 4 4 1 1 1 1 1 1 1 1 1 2 1 2 2 3 3 2 2 2 2 3 2
## 
## Initial cluster prototypes:
##              x1    x2   x3      x4    x5    x6   x7
## Cluster 1 98.72 96.00 0.00 58.5742 89.78 80.58 4.00
## Cluster 2 94.44 89.13 0.03 76.8707 97.78 90.94 3.55
## Cluster 3 96.88 92.50 0.06 73.2582 55.68 83.71 3.78
## Cluster 4 89.47 96.77 0.09 61.7424 81.73 73.53 3.86
## 
## Final cluster prototypes:
##                 x1       x2         x3       x4       x5       x6       x7
## Cluster 1 92.33050 95.66647 0.04160781 51.31007 86.68345 80.49127 3.646332
## Cluster 2 91.46070 93.01288 0.05946008 79.71876 91.20032 85.63463 3.651362
## Cluster 3 94.81695 93.92280 0.04767975 66.13097 55.30646 85.92202 3.869817
## Cluster 4 92.23131 92.28385 0.06265749 35.35767 62.44590 74.31127 3.824791
## 
## Distance between the final cluster prototypes
##           Cluster 1 Cluster 2 Cluster 3
## Cluster 2  861.7083                    
## Cluster 3 1242.9403 1485.2203          
## Cluster 4  891.6147 2924.0972 1142.1510
## 
## Difference between the initial and final cluster prototypes
##                  x1         x2          x3         x4          x5         x6
## Cluster 1 -6.389501 -0.3335303  0.04160781  -7.264125  -3.0965511 -0.0887341
## Cluster 2 -2.979302  3.8828798  0.02946008   2.848063  -6.5796843 -5.3053701
## Cluster 3 -2.063047  1.4228019 -0.01232026  -7.127235  -0.3735438  2.2120176
## Cluster 4  2.761305 -4.4861466 -0.02734251 -26.384733 -19.2841002  0.7812713
##                    x7
## Cluster 1 -0.35366813
## Cluster 2  0.10136153
## Cluster 3  0.08981725
## Cluster 4 -0.03520912
## 
## Root Mean Squared Deviations (RMSD): 18.47603 
## Mean Absolute Deviation (MAD): 186.1208 
## 
## Membership degrees matrix (top and bottom 5 rows): 
##    Cluster 1  Cluster 2  Cluster 3 Cluster 4
## 1 0.58502272 0.13347114 0.16885106 0.1126551
## 2 0.65372671 0.08732070 0.08862539 0.1703272
## 3 0.08214836 0.02110022 0.03964624 0.8571052
## 4 0.54088852 0.08578773 0.14243250 0.2308912
## 5 0.06208661 0.02228773 0.07253836 0.8430873
## ...
##     Cluster 1  Cluster 2  Cluster 3   Cluster 4
## 23 0.07772950 0.85842268 0.03935149 0.024496339
## 24 0.09283683 0.83661374 0.04401085 0.026538569
## 25 0.21113591 0.46320116 0.24695258 0.078710352
## 26 0.06782784 0.03931397 0.79758463 0.095273562
## 27 0.01631766 0.96730174 0.01110831 0.005272283
## 
## Descriptive statistics for the membership degrees by clusters
##           Size       Min        Q1      Mean    Median        Q3       Max
## Cluster 1   13 0.5408885 0.5850227 0.6957656 0.7326821 0.7852048 0.8238370
## Cluster 2    8 0.4632012 0.5389055 0.7517930 0.8475182 0.8995556 0.9673017
## Cluster 3    3 0.7975846 0.8293173 0.8485593 0.8610499 0.8740466 0.8870433
## Cluster 4    3 0.8430873 0.8500962 0.8584354 0.8571052 0.8661094 0.8751137
## 
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized 
##  0.6106878  0.4809171 
## 
## Within cluster sum of squares by cluster:
##         1         2         3         4 
## 1875.0622 1118.6255  221.0829  223.7435 
## (between_SS / total_SS =  75.68%) 
## 
## Available components: 
##  [1] "u"          "v"          "v0"         "d"          "x"         
##  [6] "cluster"    "csize"      "sumsqrs"    "k"          "m"         
## [11] "iter"       "best.start" "func.val"   "comp.time"  "inpargs"   
## [16] "algorithm"  "call"

Clustering dengan nstart

tic()
library(ppclust)
f_cmeans_2 <- fcm(data_for_clustering, centers=4, m = 2, nstart = 10 )
as.data.frame(f_cmeans_2$u)

##     Cluster 1  Cluster 2  Cluster 3   Cluster 4
## 1  0.13347114 0.58502272 0.16885106 0.112655073
## 2  0.08732070 0.65372671 0.08862539 0.170327195
## 3  0.02110022 0.08214835 0.03964624 0.857105182
## 4  0.08578773 0.54088852 0.14243250 0.230891241
## 5  0.02228773 0.06208661 0.07253836 0.843087294
## 6  0.02021903 0.05161885 0.05304843 0.875113689
## 7  0.05169384 0.82383695 0.05110991 0.073359299
## 8  0.11751085 0.78520483 0.04731261 0.049971717
## 9  0.14307740 0.74203820 0.05932692 0.055557476
## 10 0.06298265 0.73268206 0.07280334 0.131531945
## 11 0.18779989 0.65676884 0.07630671 0.079124560
## 12 0.07882846 0.79054528 0.04776771 0.082858557
## 13 0.06294722 0.78232484 0.04910546 0.105622483
## 14 0.25165701 0.56893551 0.08583475 0.093572731
## 15 0.19297657 0.58261147 0.12276917 0.101642785
## 16 0.92659219 0.03686169 0.02443773 0.012108398
## 17 0.04981330 0.80036653 0.04594886 0.103871320
## 18 0.52969310 0.29024945 0.11508916 0.064968285
## 19 0.54197636 0.14261944 0.25096090 0.064443306
## 20 0.03681621 0.04277214 0.86104995 0.059361693
## 21 0.04121985 0.03712983 0.88704333 0.034606991
## 22 0.89054334 0.05326938 0.03722535 0.018961925
## 23 0.85842268 0.07772950 0.03935149 0.024496339
## 24 0.83661374 0.09283683 0.04401085 0.026538569
## 25 0.46320116 0.21113591 0.24695258 0.078710352
## 26 0.03931397 0.06782784 0.79758463 0.095273562
## 27 0.96730174 0.01631766 0.01110831 0.005272283

toc()

## 3.31 sec elapsed

summary(f_cmeans_2)

## Summary for 'f_cmeans_2'
## 
## Number of data objects:  27 
## 
## Number of clusters:  4 
## 
## Crisp clustering vector:
##  [1] 2 2 4 2 4 4 2 2 2 2 2 2 2 2 2 1 2 1 1 3 3 1 1 1 1 3 1
## 
## Initial cluster prototypes:
##              x1    x2   x3      x4    x5    x6   x7
## Cluster 1 93.33 90.12 0.09 31.6522 69.67 73.93 3.86
## Cluster 2 85.71 90.91 0.00 67.4912 86.31 90.38 3.18
## Cluster 3 97.59 97.96 0.00 48.6842 83.12 84.47 3.23
## Cluster 4 87.65 93.88 0.19 60.6982 94.16 70.07 3.98
## 
## Final cluster prototypes:
##                 x1       x2         x3       x4       x5       x6       x7
## Cluster 1 91.46070 93.01288 0.05946008 79.71876 91.20032 85.63463 3.651362
## Cluster 2 92.33050 95.66647 0.04160781 51.31007 86.68345 80.49127 3.646332
## Cluster 3 94.81695 93.92280 0.04767975 66.13097 55.30646 85.92202 3.869817
## Cluster 4 92.23131 92.28385 0.06265749 35.35767 62.44590 74.31127 3.824791
## 
## Distance between the final cluster prototypes
##           Cluster 1 Cluster 2 Cluster 3
## Cluster 2  861.7083                    
## Cluster 3 1485.2203 1242.9403          
## Cluster 4 2924.0972  891.6147 1142.1510
## 
## Difference between the initial and final cluster prototypes
##                  x1        x2          x3        x4          x5        x6
## Cluster 1 -1.869302  2.892880 -0.03053992  48.06656  21.5303157 11.704630
## Cluster 2  6.620499  4.756470  0.04160781 -16.18113   0.3734489 -9.888734
## Cluster 3 -2.773047 -4.037198  0.04767975  17.44677 -27.8135438  1.452018
## Cluster 4  4.581305 -1.596147 -0.12734251 -25.34053 -31.7141002  4.241271
##                   x7
## Cluster 1 -0.2086385
## Cluster 2  0.4663319
## Cluster 3  0.6398172
## Cluster 4 -0.1552091
## 
## Root Mean Squared Deviations (RMSD): 39.1895 
## Mean Absolute Deviation (MAD): 431.5449 
## 
## Membership degrees matrix (top and bottom 5 rows): 
##    Cluster 1  Cluster 2  Cluster 3 Cluster 4
## 1 0.13347114 0.58502272 0.16885106 0.1126551
## 2 0.08732070 0.65372671 0.08862539 0.1703272
## 3 0.02110022 0.08214836 0.03964624 0.8571052
## 4 0.08578773 0.54088852 0.14243250 0.2308912
## 5 0.02228773 0.06208661 0.07253836 0.8430873
## ...
##     Cluster 1  Cluster 2  Cluster 3   Cluster 4
## 23 0.85842268 0.07772950 0.03935149 0.024496339
## 24 0.83661374 0.09283683 0.04401085 0.026538569
## 25 0.46320116 0.21113591 0.24695258 0.078710352
## 26 0.03931397 0.06782784 0.79758463 0.095273562
## 27 0.96730174 0.01631766 0.01110831 0.005272283
## 
## Descriptive statistics for the membership degrees by clusters
##           Size       Min        Q1      Mean    Median        Q3       Max
## Cluster 1    8 0.4632012 0.5389055 0.7517930 0.8475182 0.8995556 0.9673017
## Cluster 2   13 0.5408885 0.5850227 0.6957656 0.7326821 0.7852048 0.8238370
## Cluster 3    3 0.7975846 0.8293173 0.8485593 0.8610499 0.8740466 0.8870433
## Cluster 4    3 0.8430873 0.8500962 0.8584354 0.8571052 0.8661094 0.8751137
## 
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized 
##  0.6106878  0.4809171 
## 
## Within cluster sum of squares by cluster:
##         1         2         3         4 
## 1118.6255 1875.0622  221.0829  223.7435 
## (between_SS / total_SS =  75.68%) 
## 
## Available components: 
##  [1] "u"          "v"          "v0"         "d"          "x"         
##  [6] "cluster"    "csize"      "sumsqrs"    "k"          "m"         
## [11] "iter"       "best.start" "func.val"   "comp.time"  "inpargs"   
## [16] "algorithm"  "call"

Evaluasi 1 fuzzy c means

res.fcm4 <- ppclust2(f_cmeans, "fclust")
idxsf_1 <- SIL.F(res.fcm4$Xca, res.fcm4$U, alpha=1)
idxpe_1 <- PE(res.fcm4$U)
idxpc_1 <- PC(res.fcm4$U)
idxmpc_1 <- MPC(res.fcm4$U)


res.fcm5 <- ppclust2(f_cmeans_2, "fclust")
idxsf_2 <- SIL.F(res.fcm5$Xca, res.fcm5$U, alpha=1)
idxpe_2 <- PE(res.fcm5$U)
idxpc_2 <- PC(res.fcm5$U)
idxmpc_2 <- MPC(res.fcm5$U)

data.frame("Partition Entropy" = c(idxpe_1,idxpe_2),
           "Partition Coefficient" = c(idxpc_1,idxpc_2),
           "Modified Partition Coefficient" = c(idxmpc_1,idxmpc_2),
           "Fuzzy Silhouette Index" = c(idxsf_1,idxsf_2))

##   Partition.Entropy Partition.Coefficient Modified.Partition.Coefficient
## 1         0.7693229             0.6106878                      0.4809171
## 2         0.7693229             0.6106878                      0.4809171
##   Fuzzy.Silhouette.Index
## 1              0.7105211
## 2              0.7105211

Penambahan nstart tidak mempengaruhi hasil evaluasi!

Clustering dengan Iterasi (843.62 sec elapsed)

# library(ppclust)
# library(tictoc)
# 
# # Inisialisasi nilai-nilai m dan jumlah cluster yang akan diuji
# m_values <- seq(1, 4, by = 0.2)
# cluster_values <- 2:6  # Misal jumlah cluster yang diuji antara 2 dan 6
# 
# # List untuk menyimpan hasil dari setiap iterasi
# results <- data.frame(m = numeric(),
#                       clusters = numeric(),
#                       idxsf_2 = numeric(),
#                       idxpe_2 = numeric(),
#                       idxpc_2 = numeric(),
#                       idxmpc_2 = numeric())
# 
# tic()
# 
# # Loop untuk setiap nilai jumlah cluster
# for (clusters in cluster_values) {
#   # Loop untuk setiap nilai m
#   for (m_value in m_values) {
#     # Lakukan fuzzy c-means clustering
#     f_cmeans_2 <- fcm(data_for_clustering, centers = clusters, m = m_value, nstart = 10)
#     
#     # Konversi hasil clustering ke dalam format yang sesuai untuk ppclust
#     res.fcm5 <- ppclust2(f_cmeans_2, "fclust")
#     
#     # Hitung indeks evaluasi clustering
#     idxsf_2 <- SIL.F(res.fcm5$Xca, res.fcm5$U, alpha = 1)
#     idxpe_2 <- PE(res.fcm5$U)
#     idxpc_2 <- PC(res.fcm5$U)
#     idxmpc_2 <- MPC(res.fcm5$U)
#     
#     # Simpan hasil ke dalam data frame
#     results <- rbind(results, data.frame(m = m_value,
#                                          clusters = clusters,
#                                          idxsf_2 = idxsf_2,
#                                          idxpe_2 = idxpe_2,
#                                          idxpc_2 = idxpc_2,
#                                          idxmpc_2 = idxmpc_2))
#   }
# }
# 
# toc()  # Menghitung waktu yang dibutuhkan
# 
# # Tampilkan hasil
# print(results)

library(ppclust)
library(tictoc)

# Inisialisasi nilai-nilai m dan jumlah cluster yang akan diuji
m_values <- seq(1, 4, by = 0.2)

# List untuk menyimpan hasil dari setiap iterasi
results <- data.frame(m = numeric(),
                      idxsf_2 = numeric(),
                      idxpe_2 = numeric(),
                      idxpc_2 = numeric(),
                      idxmpc_2 = numeric())

tic()

# Loop untuk setiap nilai jumlah cluster
for (m_value in m_values) {
  # Lakukan fuzzy c-means clustering
  f_cmeans_2 <- fcm(data_for_clustering, centers = 4, m = m_value, nstart = 10)
  
  # Konversi hasil clustering ke dalam format yang sesuai untuk ppclust
  res.fcm5 <- ppclust2(f_cmeans_2, "fclust")
  
  # Hitung indeks evaluasi clustering
  idxsf_2 <- SIL.F(res.fcm5$Xca, res.fcm5$U, alpha = 1)
  idxpe_2 <- PE(res.fcm5$U)
  idxpc_2 <- PC(res.fcm5$U)
  idxmpc_2 <- MPC(res.fcm5$U)
  
  # Simpan hasil ke dalam data frame
  results <- rbind(results, data.frame(m = m_value,
                                       idxsf_2 = idxsf_2,
                                       idxpe_2 = idxpe_2,
                                       idxpc_2 = idxpc_2,
                                       idxmpc_2 = idxmpc_2))
}

toc()  # Menghitung waktu yang dibutuhkan

## 89.43 sec elapsed

# Tampilkan hasil
print(results)

##      m   idxsf_2      idxpe_2   idxpc_2   idxmpc_2
## 1  1.0 0.6636159 2.400990e-14 1.0000000 1.00000000
## 2  1.2 0.6644244 1.136448e-02 0.9963459 0.99512790
## 3  1.4 0.6737516 1.389482e-01 0.9401275 0.92017005
## 4  1.6 0.6886347 3.621894e-01 0.8295895 0.77278600
## 5  1.8 0.7016385 5.841946e-01 0.7130106 0.61734747
## 6  2.0 0.7105211 7.693229e-01 0.6106878 0.48091706
## 7  2.2 0.7161646 9.143268e-01 0.5272310 0.36964130
## 8  2.4 0.7195712 1.025038e+00 0.4618607 0.28248091
## 9  2.6 0.5450682 1.137482e+00 0.3898330 0.18644404
## 10 2.8 0.5480200 1.184082e+00 0.3633487 0.15113156
## 11 3.0 0.5512023 1.219654e+00 0.3431285 0.12417132
## 12 3.2 0.5546791 1.247273e+00 0.3274450 0.10326001
## 13 3.4 0.5585833 1.269035e+00 0.3151071 0.08680943
## 14 3.6 0.5631297 1.286417e+00 0.3052696 0.07369279
## 15 3.8 0.5686994 1.300483e+00 0.2973231 0.06309741
## 16 4.0 0.5761823 1.312010e+00 0.2908221 0.05442946

?fcm

## starting httpd help server ... done

Analisis Hasil Iterasi

skim_without_charts(data = results)

Data summary
Name	results
Number of rows	16
Number of columns	5
_______________________
Column type frequency:
numeric	5
________________________
Group variables	None

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100
m	1	2.50	0.95	1.00	1.75	2.50	3.25	4.00
idxsf_2	1	0.63	0.07	0.55	0.56	0.62	0.69	0.72
idxpe_2	1	0.86	0.49	0.00	0.53	1.08	1.25	1.31
idxpc_2	1	0.54	0.27	0.29	0.32	0.43	0.74	1.00
idxmpc_2	1	0.39	0.36	0.05	0.10	0.23	0.66	1.00

Pemilihan m optimum

#### Membuat variabel baru untuk evaluasi

normalize <- function(x) {
  return((x - mean(x)) / (sd(x)))
}

# Normalisasi semua indeks
results$idxsf_2_norm <- normalize(results$idxsf_2)
results$idxpe_2_norm <- normalize(-results$idxpe_2)  # Dibalik karena lebih kecil lebih baik
results$idxpc_2_norm <- normalize(results$idxpc_2)
results$idxmpc_2_norm <- normalize(results$idxmpc_2)

# Hitung skor gabungan sebagai rata-rata dari keempat indeks yang dinormalisasi
results$combined_score <- rowMeans(results[, c("idxsf_2_norm", "idxpe_2_norm", "idxpc_2_norm", "idxmpc_2_norm")])

# Misal memberikan bobot yang lebih tinggi untuk SIL.F dan PE
results$combined_score_weighted <- (0.4 * results$idxsf_2_norm) + 
                                   (0.2 * results$idxpe_2_norm) + 
                                   (0.2 * results$idxpc_2_norm) + 
                                   (0.2 * results$idxmpc_2_norm)

results %>% arrange(desc(combined_score_weighted))

##      m   idxsf_2      idxpe_2   idxpc_2   idxmpc_2 idxsf_2_norm idxpe_2_norm
## 1  1.0 0.6636159 2.400990e-14 1.0000000 1.00000000    0.5376437    1.7678143
## 2  1.2 0.6644244 1.136448e-02 0.9963459 0.99512790    0.5489712    1.7444566
## 3  1.4 0.6737516 1.389482e-01 0.9401275 0.92017005    0.6796542    1.4822304
## 4  1.6 0.6886347 3.621894e-01 0.8295895 0.77278600    0.8881801    1.0233969
## 5  1.8 0.7016385 5.841946e-01 0.7130106 0.61734747    1.0703763    0.5671039
## 6  2.0 0.7105211 7.693229e-01 0.6106878 0.48091706    1.1948299    0.1866047
## 7  2.2 0.7161646 9.143268e-01 0.5272310 0.36964130    1.2739020   -0.1114254
## 8  2.4 0.7195712 1.025038e+00 0.4618607 0.28248091    1.3216309   -0.3389723
## 9  2.6 0.5450682 1.137482e+00 0.3898330 0.18644404   -1.1233258   -0.5700824
## 10 2.8 0.5480200 1.184082e+00 0.3633487 0.15113156   -1.0819673   -0.6658614
## 11 4.0 0.5761823 1.312010e+00 0.2908221 0.05442946   -0.6873863   -0.9287940
## 12 3.0 0.5512023 1.219654e+00 0.3431285 0.12417132   -1.0373806   -0.7389716
## 13 3.8 0.5686994 1.300483e+00 0.2973231 0.06309741   -0.7922297   -0.9051018
## 14 3.2 0.5546791 1.247273e+00 0.3274450 0.10326001   -0.9886668   -0.7957389
## 15 3.6 0.5631297 1.286417e+00 0.3052696 0.07369279   -0.8702664   -0.8761922
## 16 3.4 0.5585833 1.269035e+00 0.3151071 0.08680943   -0.9339655   -0.8404669
##    idxpc_2_norm idxmpc_2_norm combined_score combined_score_weighted
## 1    1.70642477    1.70642477     1.42957689               1.2511903
## 2    1.69273726    1.69273726     1.41972557               1.2455747
## 3    1.48215328    1.48215328     1.28154780               1.1611691
## 4    1.06809769    1.06809769     1.01194310               0.9871905
## 5    0.63141410    0.63141410     0.72507711               0.7941369
## 6    0.24813126    0.24813126     0.46942429               0.6145054
## 7   -0.06448299   -0.06448299     0.25837766               0.4614825
## 8   -0.30934837   -0.30934837     0.09099045               0.3371185
## 9   -0.57915099   -0.57915099    -0.71292753              -0.7950072
## 10  -0.67835663   -0.67835663    -0.77613549              -0.8373019
## 11  -0.95002815   -0.95002815    -0.87905916              -0.8407246
## 12  -0.75409779   -0.75409779    -0.82113694              -0.8643857
## 13  -0.92567671   -0.92567671    -0.88717122              -0.8681829
## 14  -0.81284531   -0.81284531    -0.85252408              -0.8797526
## 15  -0.89591041   -0.89591041    -0.88456985              -0.8817092
## 16  -0.85906101   -0.85906101    -0.87313861              -0.8853040

require(psych)
# Heatmap Corrrelation Pearson
plot_correlation(data = results[c(-7:-12)],
                 type = "all",
                 cor_args = list(method="spearman"),
                 ggtheme = theme_classic(),
                 theme_config = list(legend.position = "none",
                                     axis.text.x=element_text(angle = 90)))

# Cari baris dengan skor gabungan tertinggi
best_combination <- results[which.max(results$combined_score_weighted), ]
print(best_combination)

##   m   idxsf_2     idxpe_2 idxpc_2 idxmpc_2 idxsf_2_norm idxpe_2_norm
## 1 1 0.6636159 2.40099e-14       1        1    0.5376437     1.767814
##   idxpc_2_norm idxmpc_2_norm combined_score combined_score_weighted
## 1     1.706425      1.706425       1.429577                 1.25119

Fuzzy c means dengan 4 cluster dan orde m = 1 merupakan yang paling baik dibandingkan yang lainnya. Sehingga dipilih untuk visualisasi dan interpretasi

f_cmeans_fix <- fcm(data_for_clustering, centers=4, m = 1.2, nstart = 10)

res.fcm_fix <- ppclust2(f_cmeans_fix, "fclust")
idxsf_fix <- SIL.F(res.fcm_fix$Xca, res.fcm_fix$U, alpha=1)
idxpe_fix <- PE(res.fcm_fix$U)
idxpc_fix <- PC(res.fcm_fix$U)
idxmpc_fix <- MPC(res.fcm_fix$U)

data.frame("Partition Entropy" = c(idxpe_fix),
           "Partition Coefficient" = c(idxpc_fix),
           "Modified Partition Coefficient" = c(idxmpc_fix),
           "Fuzzy Silhouette Index" = c(idxsf_fix))

##   Partition.Entropy Partition.Coefficient Modified.Partition.Coefficient
## 1        0.01136448             0.9963459                      0.9951279
##   Fuzzy.Silhouette.Index
## 1              0.6644244

Visualisasi

Pemilihan cluster

# Load ggplot2 package
library(ggplot2)
library(cowplot)

## Warning: package 'cowplot' was built under R version 4.3.3

## 
## Attaching package: 'cowplot'

## The following object is masked from 'package:ggpubr':
## 
##     get_legend

## The following object is masked from 'package:lubridate':
## 
##     stamp

# Buat scatterplot untuk setiap variabel terhadap variabel m
# Plot 1: idxsf_2 vs m
plot1 <- ggplot(results, aes(x = m, y = idxsf_2)) +
  geom_point() +
  geom_line() +
  labs(title = "Scatterplot of Silhouette Index vs m",
       x = "Derajat Fuzzy", y = "Silhouette Index") +
  theme_cowplot()

# Plot 2: idxpe_2 vs m
plot2 <- ggplot(results, aes(x = m, y = idxpe_2)) +
  geom_point() +
  geom_line() +
  labs(title = "Scatterplot of Partition Entropy vs m",
       x = "Derajat Fuzzy", y = "Partition Entropy") +
  theme_cowplot()

# Plot 3: idxpc_2 vs m
plot3 <- ggplot(results, aes(x = m, y = idxpc_2)) +
  geom_point() +
  geom_line() +
  labs(title = "Scatterplot of Partition Coefficient vs m",
       x = "Derajat Fuzzy", y = "Partition Coefficient") +
  theme_cowplot()

# Plot 4: idxmpc_2 vs m
plot4 <- ggplot(results, aes(x = m, y = idxmpc_2)) +
  geom_point() +
  geom_line() +
  labs(title = "Scatterplot of Modified Partition Coefficient vs m",
       x = "Derajat Fuzzy", y = "Modified Partition Coefficient") +
  theme_cowplot()

# Menampilkan plot
print(plot1)

print(plot2)

print(plot3)

print(plot4)

# Plot 4: idxmpc_2 vs m
plot5 <- ggplot(results, aes(x = m, y = combined_score)) +
  geom_point() +
  geom_line() +
  labs(title = "Scatterplot of Combined Score vs m",
       x = "Derajat Fuzzy", y = "Combined Score") +
  theme_classic()
plot5

ggsave("plot1.png", plot = plot1, width = 10, height = 8, dpi = 300)
ggsave("plot2.png", plot = plot2, width = 10, height = 8, dpi = 300)
ggsave("plot3.png", plot = plot3, width = 10, height = 8, dpi = 300)
ggsave("plot4.png", plot = plot4, width = 10, height = 8, dpi = 300)
ggsave("plot5.png", plot = plot5, width = 10, height = 8, dpi = 300)

plotcluster(f_cmeans_fix, cp=1, trans=TRUE)

res.fcm_fix2 <- ppclust2(f_cmeans_fix, "kmeans")
fviz_cluster(res.fcm_fix2, data = data_for_clustering, 
  ellipse.type = "convex",
  palette = "jco",
  repel = TRUE)

f_cmeans_fix3 <- ppclust2(f_cmeans_fix, "fanny")

cluster::clusplot(scale(data_for_clustering), f_cmeans_fix3$cluster,  
  main = "Cluster plot of anemia data set",
  color=TRUE, labels = 2, lines = 2, cex=1)

Corrected Cluster

sil2 <- silhouette(x = f_cmeans_fix$cluster,
                  dist = dist(x =data_for_clustering ,method = "euclidean"))

silhouette <- fviz_silhouette(sil2,print.summary = FALSE)+
  scale_color_manual(values = c("black","black","black", "black"))+
  # get_palette berasal dari package ggpubr
  scale_fill_manual(values = get_palette("jco",k=4))+
  theme_classic()+
  theme(legend.position = "top")

ggsave("Silhouette.png", plot = silhouette, width = 10, height = 8, dpi = 300)

Tidak ada kesalahan clustering, karena tidak ada amatan yang memiliki nilai SI yang negatif

sil2 |> 
  as.data.frame() |> 
  mutate(obs=1:nrow(sil2)) |> 
  relocate(obs) |> 
  filter(sil_width<0) |> 
arrange(sil_width)

## [1] obs       cluster   neighbor  sil_width
## <0 rows> (or 0-length row.names)

corrected_cluster2 <- sil2 |> 
  as.data.frame() |> 
  mutate(cluster=if_else(sil_width<0,neighbor,cluster))

Tsne

library(Rtsne)

## Warning: package 'Rtsne' was built under R version 4.3.3

tic()
set.seed(123)
rtsne_cc2 <- Rtsne(data, dims = 2,perplexity = 5)
toc()

## 0.05 sec elapsed

tsne_df2 <- rtsne_cc2$Y %>%
  as.data.frame() %>%
  rename(
    tsne1 = "V1",
    tsne2 = "V2"
    
  ) |> 
  mutate(cluster= as.character(f_cmeans_fix$cluster),
         corrected_cluster=as.character(corrected_cluster2$cluster)) %>% 
  mutate(Kabupaten=data$R102)

ggscatter(tsne_df2, x = "tsne1",
          y = "tsne2",
          label = "Kabupaten",
          # penggunaan repel=TRUE menyebabnya running time meningkat saat amatan sangat banyak
          repel = TRUE #nama negara jadi tidak bertumpuk
          ,
          color = "cluster",
          palette = "jco",
          title = "Cluster Plot with HC median linkage")

Visalisas Tsne

Interpretasi

Interpretasi berdasarkan rata-rata

data_for_clustering |>  
  mutate(cluster=f_cmeans_fix$cluster) |> 
  group_by(cluster) |> 
  summarise(across(everything(),mean))

## # A tibble: 4 × 8
##   cluster    x1    x2     x3    x4    x5    x6    x7
##     <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1       1  92.3  92.2 0.0633  33.7  60.8  73.9  3.84
## 2       2  90.9  93.0 0.0588  79.0  88.5  86.6  3.62
## 3       3  92.1  95.5 0.0492  51.9  86.1  79.8  3.68
## 4       4  95.1  93.9 0.0467  65.6  52.7  86.3  3.90

X1: Persentase bayi ASI Eksklusif X2: Persentase anak MP-ASI X3: Persentase anak imunisasi lengkap X4: Persentase anak jaminan kesehatan X5: Persentase akses sanitasi layak X6: Persentase akses air minum layak X7: Persentase konsumsi gizi rumah tangga

saran annisa

cluster 1: Kluster dengan Kesehatan Dasar Memadai cluster 2: Kluster dengan Risiko Kesehatan Tinggi cluster 3: Kluster dengan Akses Kesehatan Baik dan ASI Eksklusif Rendah cluster 4: Kluster dengan Fokus pada MP-ASI dan Akses Kesehatan Rendah

data_cluster <- data
data_cluster$cluster <- c(f_cmeans_fix$cluster)

# Heatmap Corrrelation Pearson
plot_correlation(data = data_cluster[-1],
                 type = "all",
                 cor_args = list(method="spearman"),
                 ggtheme = theme_classic(),
                 theme_config = list(legend.position = "none",
                                     axis.text.x=element_text(angle = 90)))

library(dplyr)
data_for_clustering |>  
  mutate(cluster=f_cmeans_fix$cluster) |> 
  group_by(cluster) |> 
  summarise(across(everything(),mean)) |>
  arrange(desc(x7))

## # A tibble: 4 × 8
##   cluster    x1    x2     x3    x4    x5    x6    x7
##     <int> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
## 1       4  95.1  93.9 0.0467  65.6  52.7  86.3  3.90
## 2       1  92.3  92.2 0.0633  33.7  60.8  73.9  3.84
## 3       3  92.1  95.5 0.0492  51.9  86.1  79.8  3.68
## 4       2  90.9  93.0 0.0588  79.0  88.5  86.6  3.62

Interpretasi berdasarkan jumlah

data_for_clustering |>  
  mutate(cluster=f_cmeans_fix$cluster) |> 
  group_by(cluster) |> 
  summarise(across(everything(),sum))

## # A tibble: 4 × 8
##   cluster    x1    x2    x3    x4    x5    x6    x7
##     <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1       1  277.  277.  0.19  101.  182.  222.  11.5
## 2       2  727.  744.  0.47  632.  708.  693.  29.0
## 3       3 1197. 1241.  0.64  675. 1120. 1037.  47.8
## 4       4  285.  282.  0.14  197.  158.  259.  11.7

f_cmeans_fix

## $u
##       Cluster 1    Cluster 2    Cluster 3    Cluster 4
## 1  9.789946e-05 8.772709e-04 9.983994e-01 6.254218e-04
## 2  2.858063e-04 2.793190e-05 9.996720e-01 1.422355e-05
## 3  9.999765e-01 3.201489e-08 2.294834e-05 4.879820e-07
## 4  3.563596e-03 1.156217e-04 9.958544e-01 4.664176e-04
## 5  9.999945e-01 1.384224e-08 1.755966e-06 3.724079e-06
## 6  9.999998e-01 1.229097e-09 1.096418e-07 1.184759e-07
## 7  4.870223e-06 2.472285e-06 9.999917e-01 9.613304e-07
## 8  4.332586e-07 6.007304e-05 9.999392e-01 3.229373e-07
## 9  1.250283e-06 2.821751e-04 9.997149e-01 1.637070e-06
## 10 1.392111e-04 1.080750e-05 9.998403e-01 9.652632e-06
## 11 1.916028e-05 3.169220e-03 9.967959e-01 1.574779e-05
## 12 1.393972e-05 1.963055e-05 9.999654e-01 1.002003e-06
## 13 3.689103e-05 6.167035e-06 9.999559e-01 1.000516e-06
## 14 4.978894e-05 9.893638e-03 9.900254e-01 3.114943e-05
## 15 3.920929e-05 2.824535e-03 9.970468e-01 8.946920e-05
## 16 7.208105e-10 9.999997e-01 3.298154e-07 1.872132e-08
## 17 3.606909e-05 2.372780e-06 9.999607e-01 8.220383e-07
## 18 5.009431e-06 9.835843e-01 1.634203e-02 6.870594e-05
## 19 3.920701e-06 9.974987e-01 3.657127e-04 2.131685e-03
## 20 5.774673e-08 1.182002e-08 1.585602e-08 9.999999e-01
## 21 2.218388e-07 1.547941e-06 5.352742e-07 9.999977e-01
## 22 1.931842e-08 9.999943e-01 5.236320e-06 4.601079e-07
## 23 1.252774e-07 9.999324e-01 6.632882e-05 1.124285e-06
## 24 8.735545e-08 9.999300e-01 6.901695e-05 9.202931e-07
## 25 1.696532e-05 9.921733e-01 4.693065e-03 3.116621e-03
## 26 2.010210e-05 7.457159e-07 6.917615e-06 9.999722e-01
## 27 5.827956e-12 1.000000e+00 2.728804e-09 1.871638e-10
## 
## $v
##                 x1       x2         x3       x4       x5       x6       x7
## Cluster 1 92.30961 92.21554 0.06332723 33.73207 60.82628 73.85286 3.843237
## Cluster 2 90.88895 93.01076 0.05889572 79.03480 88.53817 86.56778 3.621260
## Cluster 3 92.11427 95.49340 0.04907357 51.88424 86.13247 79.79236 3.679420
## Cluster 4 95.10359 93.93061 0.04669033 65.57987 52.69128 86.27682 3.896530
## 
## $v0
##              x1    x2   x3      x4    x5    x6   x7
## Cluster 1 87.50 96.43 0.07 42.3280 90.10 79.09 3.72
## Cluster 2 85.71 90.91 0.00 67.4912 86.31 90.38 3.18
## Cluster 3 94.59 96.00 0.20 86.8881 94.39 86.97 3.68
## Cluster 4 91.35 88.73 0.05 50.1543 75.04 81.05 3.60
## 
## $d
##     Cluster 1  Cluster 2  Cluster 3  Cluster 4
## 1  1007.98470  650.10463  159.12893  695.62533
## 2   750.44667 1194.85197  146.74479 1367.51656
## 3    87.97669 2775.20578  745.10136 1609.47720
## 4   536.64749 1065.26809  173.95128  805.95688
## 5    78.06845 2912.28438 1105.53331  951.19879
## 6    57.69870 3493.40328 1422.88823 1401.00587
## 7   894.56107 1024.47194   77.46767 1237.50008
## 8  1498.44311  558.79662   79.98250 1589.15428
## 9  1556.14746  526.43975  102.67795 1474.48025
## 10  757.80408 1263.41698  128.32332 1292.29773
## 11 1747.64209  629.13286  199.16435 1817.56066
## 12 1388.22476 1296.35540  148.35889 2350.37282
## 13  999.17761 1428.93204  129.72709 2055.80553
## 14 1877.13635  651.40515  259.29439 2061.73277
## 15 1250.45585  531.56547  164.43833 1060.25773
## 16 3310.86454   49.14798  972.41454 1725.99027
## 17  818.61142 1410.76071  105.80550 1743.91949
## 18 2107.94837  184.18546  417.98091 1248.58695
## 19 2682.19062  222.52557 1082.76540  761.05802
## 20 1480.18938 2032.82102 1916.83145   52.79864
## 21 1706.93561 1157.37419 1431.22985   79.69386
## 22 4143.71372  118.73659 1351.38422 2197.92134
## 23 3782.69573  157.53673 1079.03489 2438.93727
## 24 3532.63532  136.88721  930.17711 2205.82761
## 25 2072.21948  230.69099  673.06472  730.48505
## 26  770.44180 1488.92710  953.66719   88.59119
## 27 3342.08777   18.92871  977.07219 1669.84717
## 
## $x
##       x1     x2   x3      x4    x5    x6   x7
## 1  96.52  98.63 0.07 57.7483 77.28 83.93 3.60
## 2  92.39  96.83 0.03 50.5455 81.35 68.83 3.89
## 3  93.33  90.12 0.09 31.6522 69.67 73.93 3.86
## 4  91.35  88.73 0.05 50.1543 75.04 81.05 3.60
## 5  92.59  98.39 0.03 36.6951 56.78 77.68 4.10
## 6  91.01  88.14 0.07 32.8277 56.01 69.94 3.57
## 7  97.59  97.96 0.00 48.6842 83.12 84.47 3.23
## 8  95.52  92.11 0.04 57.5531 90.85 78.22 3.41
## 9  98.72  96.00 0.00 58.5742 89.78 80.58 4.00
## 10 92.86 100.00 0.00 44.9799 81.43 85.93 3.72
## 11 85.07  91.67 0.04 55.1300 92.64 88.85 3.66
## 12 91.07  96.15 0.03 44.4327 95.67 80.38 3.72
## 13 87.50  96.43 0.07 42.3280 90.10 79.09 3.72
## 14 87.65  93.88 0.19 60.6982 94.16 70.07 3.98
## 15 89.47  96.77 0.09 61.7424 81.73 73.53 3.86
## 16 88.79  95.59 0.03 82.6220 89.73 81.69 3.71
## 17 91.75  96.23 0.03 41.9298 86.62 82.19 3.45
## 18 85.71  90.91 0.00 67.4912 86.31 90.38 3.18
## 19 96.92  92.31 0.04 81.3657 75.15 87.52 3.90
## 20 95.00  94.29 0.00 66.0589 45.59 87.69 3.94
## 21 96.88  92.50 0.06 73.2582 55.68 83.71 3.78
## 22 94.59  96.00 0.20 86.8881 94.39 86.97 3.68
## 23 88.61  92.16 0.05 82.0101 98.30 79.68 3.80
## 24 94.44  89.13 0.03 76.8707 97.78 90.94 3.55
## 25 84.91  96.67 0.12 73.5336 76.25 87.10 3.56
## 26 93.44  95.00 0.08 57.4074 56.76 87.43 3.97
## 27 93.02  91.30 0.00 81.3433 90.18 88.42 3.58
## 
## $cluster
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 
##  3  3  1  3  1  1  3  3  3  3  3  3  3  3  3  2  3  2  2  4  4  2  2  2  2  4 
## 27 
##  2 
## 
## $csize
##  1  2  3  4 
##  3  8 13  3 
## 
## $sumsqrs
## $sumsqrs$between.ss
## [1] 10887.16
## 
## $sumsqrs$within.ss
##         1         2         3         4 
##  223.7435 1118.6255 1875.0622  221.0829 
## 
## $sumsqrs$tot.within.ss
## [1] 3438.514
## 
## $sumsqrs$tot.ss
## [1] 14325.68
## 
## 
## $k
## [1] 4
## 
## $m
## [1] 1.2
## 
## $iter
##  [1] 11 63 10 76 57  9 78 10 75 21
## 
## $best.start
## [1] 1
## 
## $func.val
##  [1] 3436.481 4538.821 3436.481 4538.821 4538.821 3436.481 4538.821 3436.481
##  [9] 4538.821 3436.481
## 
## $comp.time
##  [1] 0.03 0.20 0.04 0.19 0.12 0.02 0.19 0.02 0.19 0.05
## 
## $inpargs
## $inpargs$iter.max
## [1] 1000
## 
## $inpargs$con.val
## [1] 1e-09
## 
## $inpargs$dmetric
## [1] "sqeuclidean"
## 
## $inpargs$alginitv
## [1] "kmpp"
## 
## $inpargs$alginitu
## [1] "imembrand"
## 
## $inpargs$fixcent
## [1] FALSE
## 
## $inpargs$fixmemb
## [1] FALSE
## 
## $inpargs$stand
## [1] FALSE
## 
## 
## $algorithm
## [1] "FCM"
## 
## $call
## fcm(x = data_for_clustering, centers = 4, m = 1.2, nstart = 10)
## 
## attr(,"class")
## [1] "ppclust"

summary(f_cmeans_fix)

## Summary for 'f_cmeans_fix'
## 
## Number of data objects:  27 
## 
## Number of clusters:  4 
## 
## Crisp clustering vector:
##  [1] 3 3 1 3 1 1 3 3 3 3 3 3 3 3 3 2 3 2 2 4 4 2 2 2 2 4 2
## 
## Initial cluster prototypes:
##              x1    x2   x3      x4    x5    x6   x7
## Cluster 1 87.50 96.43 0.07 42.3280 90.10 79.09 3.72
## Cluster 2 85.71 90.91 0.00 67.4912 86.31 90.38 3.18
## Cluster 3 94.59 96.00 0.20 86.8881 94.39 86.97 3.68
## Cluster 4 91.35 88.73 0.05 50.1543 75.04 81.05 3.60
## 
## Final cluster prototypes:
##                 x1       x2         x3       x4       x5       x6       x7
## Cluster 1 92.30961 92.21554 0.06332723 33.73207 60.82628 73.85286 3.843237
## Cluster 2 90.88895 93.01076 0.05889573 79.03480 88.53817 86.56778 3.621260
## Cluster 3 92.11427 95.49340 0.04907357 51.88424 86.13247 79.79236 3.679420
## Cluster 4 95.10359 93.93061 0.04669033 65.57987 52.69128 86.27682 3.896530
## 
## Distance between the final cluster prototypes
##           Cluster 1 Cluster 2 Cluster 3
## Cluster 2 2984.6558                    
## Cluster 3 1015.9917  796.5153          
## Cluster 4 1245.5660 1484.8049 1359.3573
## 
## Difference between the initial and final cluster prototypes
##                  x1        x2           x3        x4         x5        x6
## Cluster 1  4.809615 -4.214456 -0.006672769  -8.59593 -29.273721 -5.237136
## Cluster 2  5.178952  2.100765  0.058895725  11.54360   2.228171 -3.812216
## Cluster 3 -2.475733 -0.506595 -0.150926430 -35.00386  -8.257531 -7.177640
## Cluster 4  3.753587  5.200605 -0.003309668  15.42557 -22.348723  5.226821
##                     x7
## Cluster 1  0.123236792
## Cluster 2  0.441260011
## Cluster 3 -0.000580016
## Cluster 4  0.296529676
## 
## Root Mean Squared Deviations (RMSD): 28.89941 
## Mean Absolute Deviation (MAD): 321.0421 
## 
## Membership degrees matrix (top and bottom 5 rows): 
##     Cluster 1   Cluster 2   Cluster 3   Cluster 4
## 1 0.000097899 0.000877271 0.998399408 0.000625422
## 2 0.000285806 0.000027932 0.999672038 0.000014224
## 3 0.999976532 0.000000032 0.000022948 0.000000488
## 4 0.003563596 0.000115622 0.995854365 0.000466418
## 5 0.999994506 0.000000014 0.000001756 0.000003724
## ...
##     Cluster 1   Cluster 2   Cluster 3   Cluster 4
## 23 1.2500e-07 0.999932422 0.000066329 0.000001124
## 24 8.7000e-08 0.999929975 0.000069017 0.000000920
## 25 1.6965e-05 0.992173348 0.004693065 0.003116621
## 26 2.0102e-05 0.000000746 0.000006918 0.999972235
## 27 0.0000e+00 0.999999997 0.000000003 0.000000000
## 
## Descriptive statistics for the membership degrees by clusters
##           Size       Min        Q1      Mean    Median        Q3       Max
## Cluster 1    3 0.9999765 0.9999855 0.9999903 0.9999945 0.9999971 0.9999998
## Cluster 2    8 0.9835843 0.9961673 0.9966391 0.9999312 0.9999956 1.0000000
## Cluster 3   13 0.9900254 0.9970468 0.9982432 0.9997149 0.9999559 0.9999917
## Cluster 4    3 0.9999722 0.9999850 0.9999899 0.9999977 0.9999988 0.9999999
## 
## Dunn's Fuzziness Coefficients:
## dunn_coeff normalized 
##  0.9963459  0.9951279 
## 
## Within cluster sum of squares by cluster:
##         1         2         3         4 
##  223.7435 1118.6255 1875.0622  221.0829 
## (between_SS / total_SS =  76%) 
## 
## Available components: 
##  [1] "u"          "v"          "v0"         "d"          "x"         
##  [6] "cluster"    "csize"      "sumsqrs"    "k"          "m"         
## [11] "iter"       "best.start" "func.val"   "comp.time"  "inpargs"   
## [16] "algorithm"  "call"

# Load the libraries
library(sf)

## Warning: package 'sf' was built under R version 4.3.3

## Linking to GEOS 3.11.2, GDAL 3.8.2, PROJ 9.3.1; sf_use_s2() is TRUE

library(ggplot2)
library(dplyr)
data_cluster

##    R102    x1     x2   x3      x4    x5    x6   x7 cluster
## 1     1 96.52  98.63 0.07 57.7483 77.28 83.93 3.60       3
## 2     2 92.39  96.83 0.03 50.5455 81.35 68.83 3.89       3
## 3     3 93.33  90.12 0.09 31.6522 69.67 73.93 3.86       1
## 4     4 91.35  88.73 0.05 50.1543 75.04 81.05 3.60       3
## 5     5 92.59  98.39 0.03 36.6951 56.78 77.68 4.10       1
## 6     6 91.01  88.14 0.07 32.8277 56.01 69.94 3.57       1
## 7     7 97.59  97.96 0.00 48.6842 83.12 84.47 3.23       3
## 8     8 95.52  92.11 0.04 57.5531 90.85 78.22 3.41       3
## 9     9 98.72  96.00 0.00 58.5742 89.78 80.58 4.00       3
## 10   10 92.86 100.00 0.00 44.9799 81.43 85.93 3.72       3
## 11   11 85.07  91.67 0.04 55.1300 92.64 88.85 3.66       3
## 12   12 91.07  96.15 0.03 44.4327 95.67 80.38 3.72       3
## 13   13 87.50  96.43 0.07 42.3280 90.10 79.09 3.72       3
## 14   14 87.65  93.88 0.19 60.6982 94.16 70.07 3.98       3
## 15   15 89.47  96.77 0.09 61.7424 81.73 73.53 3.86       3
## 16   16 88.79  95.59 0.03 82.6220 89.73 81.69 3.71       2
## 17   17 91.75  96.23 0.03 41.9298 86.62 82.19 3.45       3
## 18   18 85.71  90.91 0.00 67.4912 86.31 90.38 3.18       2
## 19   71 96.92  92.31 0.04 81.3657 75.15 87.52 3.90       2
## 20   72 95.00  94.29 0.00 66.0589 45.59 87.69 3.94       4
## 21   73 96.88  92.50 0.06 73.2582 55.68 83.71 3.78       4
## 22   74 94.59  96.00 0.20 86.8881 94.39 86.97 3.68       2
## 23   75 88.61  92.16 0.05 82.0101 98.30 79.68 3.80       2
## 24   76 94.44  89.13 0.03 76.8707 97.78 90.94 3.55       2
## 25   77 84.91  96.67 0.12 73.5336 76.25 87.10 3.56       2
## 26   78 93.44  95.00 0.08 57.4074 56.76 87.43 3.97       4
## 27   79 93.02  91.30 0.00 81.3433 90.18 88.42 3.58       2

# Load the CSV data
data_clustered <- read.csv("Summary_Cluster tableau.csv", sep = ";")
data_clustered$Cluster <- data_cluster$cluster
# Load shapefile of the regions (replace with the path to your shapefile)
shapefile <- st_read("RBI_50K_2023_Jawa Barat.shp")

## Reading layer `RBI_50K_2023_Jawa Barat' from data source 
##   `C:\Users\Administrator\Documents\kuliah\Akademik\Kampus\Kuliah\S2-Semester 3\Pemodelan Klasifikasi\RBI_50K_2023_Jawa Barat.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 25 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY, XYZ
## Bounding box:  xmin: 106.3703 ymin: -7.82099 xmax: 108.8468 ymax: -5.806538
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84

# Gabungkan shapefile dengan data kluster
data_merged <- shapefile %>%
  left_join(data_clustered, by = c("NAMOBJ" = "Kabupaten.kota"))

# Periksa hasil penggabungan
head(data_merged)

## Simple feature collection with 6 features and 29 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XYZ
## Bounding box:  xmin: 106.4012 ymin: -7.574692 xmax: 108.7234 ymax: -5.918425
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84
##          NAMOBJ      FCODE REMARK                   METADATA SRS_ID KDBBPS
## 1       Bandung BA03050040   <NA> TASWIL5000020230907KABKOTA   4326   <NA>
## 2 Bandung Barat BA03050040   <NA> TASWIL5000020230907KABKOTA   4326   <NA>
## 3        Bekasi BA03050040   <NA> TASWIL5000020230907KABKOTA   4326   <NA>
## 4         Bogor BA03050040   <NA> TASWIL5000020230907KABKOTA   4326   <NA>
## 5        Ciamis BA03050040   <NA> TASWIL5000020230907KABKOTA   4326   <NA>
## 6       Cianjur BA03050040   <NA> TASWIL5000020230907KABKOTA   4326   <NA>
##   KDCBPS KDCPUM KDEBPS KDEPUM KDPBPS KDPKAB KDPPUM   LUASWH TIPADM WADMKC
## 1   <NA>   <NA>   <NA>   <NA>   <NA>  32.04     32 1740.843      4   <NA>
## 2   <NA>   <NA>   <NA>   <NA>   <NA>  32.17     32 1283.439      4   <NA>
## 3   <NA>   <NA>   <NA>   <NA>   <NA>  32.16     32 1251.054      4   <NA>
## 4   <NA>   <NA>   <NA>   <NA>   <NA>  32.01     32 2991.778      4   <NA>
## 5   <NA>   <NA>   <NA>   <NA>   <NA>  32.07     32 1595.937      4   <NA>
## 6   <NA>   <NA>   <NA>   <NA>   <NA>  32.03     32 3631.915      4   <NA>
##   WADMKD        WADMKK     WADMPR WIADKC  WIADKK WIADPR WIADKD SHAPE_Leng
## 1   <NA>       Bandung Jawa Barat   <NA>    <NA>   <NA>      0   3.242820
## 2   <NA> Bandung Barat Jawa Barat   <NA> Bandung   <NA>      0   2.564628
## 3   <NA>        Bekasi Jawa Barat   <NA>    <NA>   <NA>      0   3.135190
## 4   <NA>         Bogor Jawa Barat   <NA>    <NA>   <NA>      0   4.526551
## 5   <NA>        Ciamis Jawa Barat   <NA>    <NA>   <NA>      0   3.004328
## 6   <NA>       Cianjur Jawa Barat   <NA>    <NA>   <NA>      0   4.398638
##   SHAPE_Area  R102 Cluster  Latitude Longitude                       geometry
## 1  0.1424909 32.04       3 -6.914744  107.6098 MULTIPOLYGON Z (((107.9147 ...
## 2  0.1050062 32.17       3 -6.854496  107.4726 MULTIPOLYGON Z (((107.5938 ...
## 3  0.1022227 32.16       2 -6.236853  107.0055 MULTIPOLYGON Z (((107.0159 ...
## 4  0.2446141 32.01       3 -6.595038  106.8166 MULTIPOLYGON Z (((106.991 -...
## 5  0.1306842 32.07       3 -7.332494  108.3538 MULTIPOLYGON Z (((108.3857 ...
## 6  0.2973014 32.03       1 -6.820958  107.1424 MULTIPOLYGON Z (((107.3021 ...

data_merged <- st_make_valid(data_merged)

# Contoh warna untuk masing-masing cluster
custom_colors <- c("1" = "#3d3e9f", "2" = "#5c5fd4", "3" = "#8a8aff", "4" = "#b7b7ff")

# Plot dengan warna yang ditentukan secara manual
tes <- ggplot(data_merged) +
  geom_sf(aes(fill = as.factor(Cluster))) +
  scale_fill_manual(values = custom_colors) +
  labs(title = "Clustering of Kabupaten/Kota di Jawa Barat",
       fill = "Cluster") +
  theme_cowplot()

library(sf)
library(ggplot2)
library(dplyr)

# Misalnya shapefile sudah dibaca dan digabungkan dengan data kluster
# Contoh data_merge adalah hasil join antara shapefile dan data kluster
ggplot(data_merged) +
  geom_sf(aes(fill = as.factor(Cluster))) +
  scale_fill_manual(values = custom_colors) +
  labs(title = "Clustering of Kabupaten/Kota di Jawa Barat",
       fill = "Cluster") +
  theme_cowplot()

ggsave("peta_jawa_barat_kluster_custom_colors.png",tes,  width = 10, height = 8)

Sintaks Tukel Pemklas

Denan

2024-08-23