LIBRARY

library(foreign)

## Warning: package 'foreign' was built under R version 4.3.3

library(psych)

## Warning: package 'psych' was built under R version 4.3.2

library(GPArotation)

## Warning: package 'GPArotation' was built under R version 4.3.3

## 
## Attaching package: 'GPArotation'

## The following objects are masked from 'package:psych':
## 
##     equamax, varimin

library(clValid)

## Warning: package 'clValid' was built under R version 4.3.3

## Loading required package: cluster

library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.3

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

library(cluster)
library(factoextra)

## Warning: package 'factoextra' was built under R version 4.3.3

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

library(tidyverse)

## Warning: package 'purrr' was built under R version 4.3.3

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.0

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%()   masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(car)

## Warning: package 'car' was built under R version 4.3.2

## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
## 
## The following object is masked from 'package:psych':
## 
##     logit

library(kernlab)

## 
## Attaching package: 'kernlab'
## 
## The following object is masked from 'package:purrr':
## 
##     cross
## 
## The following object is masked from 'package:ggplot2':
## 
##     alpha
## 
## The following object is masked from 'package:psych':
## 
##     alpha

library(DataExplorer)
library(Rtsne)

## Warning: package 'Rtsne' was built under R version 4.3.3

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.3.3

library(tictoc)

## 
## Attaching package: 'tictoc'
## 
## The following object is masked from 'package:kernlab':
## 
##     size

library(dplyr)

DATA

data_kp_41 <- read.dbf("C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/SUSENAS/Jawa Barat/2023/dbf/Modul KP (Konsumsi Pengeluaran)/32_ssn_202303_kp_blok41.dbf")
head(data_kp_41)

##     URUT R101 R102 R105 R301 KODE KLP   COICOP B41K5 B41K6 B41K7 B41K8 B41K9
## 1 500001   32    7    2    4    1   0     <NA>   0.0 92900     0     0   0.0
## 2 500001   32    7    2    4    2   1 01111001   7.0 82900     0     0   7.0
## 3 500001   32    7    2    4    6   1 01115005   0.5 10000     0     0   0.5
## 4 500001   32    7    2    4    8   0     <NA>   0.0     0     0 18000   0.0
## 5 500001   32    7    2    4    9   8 01178001   0.0     0     3  6000   3.0
## 6 500001   32    7    2    4   10   8 01178002   0.0     0     1  6000   1.0
##   B41K10  KALORI PROTEIN  LEMAK   KARBO     WERT    WEIND   PSU    SSU   WI1
## 1  92900 27019.0  638.25 106.50 5814.50 454.8891 1819.556 12448 123442 12435
## 2  82900 25354.0  593.25 101.50 5428.50 454.8891 1819.556 12448 123442 12435
## 3  10000  1665.0   45.00   5.00  386.00 454.8891 1819.556 12448 123442 12435
## 4  18000  6314.4   52.78  14.69 1489.39 454.8891 1819.556 12448 123442 12435
## 5   6000  3927.0   25.50   7.65  938.40 454.8891 1819.556 12448 123442 12435
## 6   6000  1252.0   11.78   3.26  293.68 454.8891 1819.556 12448 123442 12435
##      WI2
## 1 123427
## 2 123427
## 3 123427
## 4 123427
## 5 123427
## 6 123427

data_kp_43 <- read.dbf("C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/SUSENAS/Jawa Barat/2023/dbf/Modul KP (Konsumsi Pengeluaran)/32_ssn_202303_kp_blok43.dbf")
head(data_kp_43)

##     URUT R101 R102 R105 R301    FOOD    NONFOOD   EXPEND     KAPITA KALORI_KAP
## 1 500001   32    7    2    4 2660400  2304033.3  4964433  1241108.3   2365.173
## 2 500002   32   72    1    2 1108714   525166.7  1633881   816940.5   2611.639
## 3 500003   32    6    2    3 2413886  1398333.3  3812219  1270739.7   2526.510
## 4 500004   32   72    1    7 7770000  4313333.3 12083333  1726190.5   3655.855
## 5 500005   32   77    1    3 4932557 46219750.0 51152307 17050769.0   2330.988
## 6 500006   32   77    1    2 3272143 11400716.7 14672860  7336429.8   2330.760
##   PROTE_KAP LEMAK_KAP KARBO_KAP       WERT      WEIND   PSU    SSU   WI1    WI2
## 1  67.13024  43.22239  352.9857 454.889101 1819.55641 12448 123442 12435 123427
## 2  69.09189  30.11824  471.9977 172.376821  344.75364 31373 311039 31360 311024
## 3  68.74262  58.51762  360.4562 241.578661  724.73598 12092 119908 12079 119893
## 4 141.50262 119.46043  454.9395  93.673563  655.71494 31135 308689 31122 308674
## 5  78.53335  53.14161  254.8461 122.217556  366.65267 33988 336798 33975 336783
## 6  68.15351  60.91893  317.6763   7.638085   15.27617 34062 337531 34049 337516

# Menggabungkan dua tabel berdasarkan kolom "URUT"
data <- merge(data_kp_41, data_kp_43, by = "URUT")
# Menampilkan hasil penggabungan
head(data)

##     URUT R101.x R102.x R105.x R301.x KODE KLP   COICOP B41K5 B41K6 B41K7 B41K8
## 1 500001     32      7      2      4    1   0     <NA>   0.0 92900     0     0
## 2 500001     32      7      2      4    2   1 01111001   7.0 82900     0     0
## 3 500001     32      7      2      4    6   1 01115005   0.5 10000     0     0
## 4 500001     32      7      2      4    8   0     <NA>   0.0     0     0 18000
## 5 500001     32      7      2      4    9   8 01178001   0.0     0     3  6000
## 6 500001     32      7      2      4   10   8 01178002   0.0     0     1  6000
##   B41K9 B41K10  KALORI PROTEIN  LEMAK   KARBO   WERT.x  WEIND.x PSU.x  SSU.x
## 1   0.0  92900 27019.0  638.25 106.50 5814.50 454.8891 1819.556 12448 123442
## 2   7.0  82900 25354.0  593.25 101.50 5428.50 454.8891 1819.556 12448 123442
## 3   0.5  10000  1665.0   45.00   5.00  386.00 454.8891 1819.556 12448 123442
## 4   0.0  18000  6314.4   52.78  14.69 1489.39 454.8891 1819.556 12448 123442
## 5   3.0   6000  3927.0   25.50   7.65  938.40 454.8891 1819.556 12448 123442
## 6   1.0   6000  1252.0   11.78   3.26  293.68 454.8891 1819.556 12448 123442
##   WI1.x  WI2.x R101.y R102.y R105.y R301.y    FOOD NONFOOD  EXPEND  KAPITA
## 1 12435 123427     32      7      2      4 2660400 2304033 4964433 1241108
## 2 12435 123427     32      7      2      4 2660400 2304033 4964433 1241108
## 3 12435 123427     32      7      2      4 2660400 2304033 4964433 1241108
## 4 12435 123427     32      7      2      4 2660400 2304033 4964433 1241108
## 5 12435 123427     32      7      2      4 2660400 2304033 4964433 1241108
## 6 12435 123427     32      7      2      4 2660400 2304033 4964433 1241108
##   KALORI_KAP PROTE_KAP LEMAK_KAP KARBO_KAP   WERT.y  WEIND.y PSU.y  SSU.y WI1.y
## 1   2365.173  67.13024  43.22239  352.9857 454.8891 1819.556 12448 123442 12435
## 2   2365.173  67.13024  43.22239  352.9857 454.8891 1819.556 12448 123442 12435
## 3   2365.173  67.13024  43.22239  352.9857 454.8891 1819.556 12448 123442 12435
## 4   2365.173  67.13024  43.22239  352.9857 454.8891 1819.556 12448 123442 12435
## 5   2365.173  67.13024  43.22239  352.9857 454.8891 1819.556 12448 123442 12435
## 6   2365.173  67.13024  43.22239  352.9857 454.8891 1819.556 12448 123442 12435
##    WI2.y
## 1 123427
## 2 123427
## 3 123427
## 4 123427
## 5 123427
## 6 123427

data <- data %>%
  select(R102.x,B41K5,B41K9,B41K7,KALORI,PROTEIN,LEMAK,KARBO)
head(data)

##   R102.x B41K5 B41K9 B41K7  KALORI PROTEIN  LEMAK   KARBO
## 1      7   0.0   0.0     0 27019.0  638.25 106.50 5814.50
## 2      7   7.0   7.0     0 25354.0  593.25 101.50 5428.50
## 3      7   0.5   0.5     0  1665.0   45.00   5.00  386.00
## 4      7   0.0   0.0     0  6314.4   52.78  14.69 1489.39
## 5      7   0.0   3.0     3  3927.0   25.50   7.65  938.40
## 6      7   0.0   1.0     1  1252.0   11.78   3.26  293.68

MENGOBAH KODE KABUPATEN/KOTA

#Create a mapping vector for the R102 variable
labels <- c(
  "1" = "Bogor",
  "2" = "Sukabumi",
  "3" = "Cianjur",
  "4" = "Bandung",
  "5" = "Garut",
  "6" = "Tasikmalaya",
  "7" = "Ciamis",
  "8" = "Kuningan",
  "9" = "Cirebon",
  "10" = "Majalengka",
  "11" = "Sumedang",
  "12" = "Indramayu",
  "13" = "Subang",
  "14" = "Purwakarta",
  "15" = "Karawang",
  "16" = "Bekasi",
  "17" = "Bandung Barat",
  "18" = "Pangandaran",
  "71" = "Kota Bogor",
  "72" = "Kota Sukabumi",
  "73" = "Kota Bandung",
  "74" = "Kota Cirebon",
  "75" = "Kota Bekasi",
  "76" = "Kota Depok",
  "77" = "Kota Cimahi",
  "78" = "Kota Tasikmalaya",
  "79" = "Kota Banjar"
)

data$KOTA_KAB <- labels[as.character(data$R102.x)]

# Display the first few rows of the data with the labeled R102
head(data[, c("R102.x", "KOTA_KAB")])

##   R102.x KOTA_KAB
## 1      7   Ciamis
## 2      7   Ciamis
## 3      7   Ciamis
## 4      7   Ciamis
## 5      7   Ciamis
## 6      7   Ciamis

MENGUBAH NAMA KOLOM

data <- data %>%
  rename(
    "KONSUMSI_PEMBELIAN" = "B41K5",
    "KONSUMSI_PRODUKSI" = "B41K7",
    "KONSUMSI" = "B41K9"
  )

MEMANGGIL DATA DENGAN KOLOM TERPILIH

data_select<-data[,c("KOTA_KAB","KONSUMSI_PEMBELIAN","KONSUMSI_PRODUKSI","KONSUMSI","KALORI","PROTEIN","LEMAK","KARBO")]
head(data_select)

##   KOTA_KAB KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI  KALORI PROTEIN  LEMAK
## 1   Ciamis                0.0                 0      0.0 27019.0  638.25 106.50
## 2   Ciamis                7.0                 0      7.0 25354.0  593.25 101.50
## 3   Ciamis                0.5                 0      0.5  1665.0   45.00   5.00
## 4   Ciamis                0.0                 0      0.0  6314.4   52.78  14.69
## 5   Ciamis                0.0                 3      3.0  3927.0   25.50   7.65
## 6   Ciamis                0.0                 1      1.0  1252.0   11.78   3.26
##     KARBO
## 1 5814.50
## 2 5428.50
## 3  386.00
## 4 1489.39
## 5  938.40
## 6  293.68

plot_intro(data_select,ggtheme = theme_pubr())

MENYATUKAN DATA BERDASARKAN KABUPATEN/KOTA

data_kp <-aggregate(cbind(KONSUMSI_PEMBELIAN,KONSUMSI_PRODUKSI,KONSUMSI,KALORI,PROTEIN,LEMAK,KARBO)~KOTA_KAB,data_select,
                     FUN = mean,na.rm =TRUE)
head(data_kp)

##        KOTA_KAB KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI   KALORI  PROTEIN
## 1       Bandung           5.633103         0.3132981 5.946401 1804.658 51.40107
## 2 Bandung Barat           4.887103         0.3175596 5.204663 1735.882 50.24038
## 3        Bekasi           7.079314         0.1958701 7.275185 1870.661 57.00926
## 4         Bogor           6.660440         0.2398892 6.900329 1847.940 53.84075
## 5        Ciamis           5.043323         0.5632651 5.606588 1652.160 48.62661
## 6       Cianjur           5.658940         0.7627645 6.421704 2010.038 57.51819
##      LEMAK    KARBO
## 1 44.71250 253.2761
## 2 41.52217 249.6948
## 3 43.22562 259.3475
## 4 41.64808 264.3559
## 5 35.52076 249.9251
## 6 42.15090 307.3939

MENYIMPAN DATA

write.csv(data_kp, file = "C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/data_kp.csv", row.names = FALSE)

library(writexl)
write_xlsx(data_kp, "C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/data_kp.xlsx")

BOX PLOT

# Mengatur layout: 1 baris, 3 kolom
par(mfrow = c(1, 3))  # Atur layout plot

# Membuat boxplot untuk KARBO
boxplot(data_kp$KARBO, main = "Boxplot Karbo", ylab = "KARBO", col = "lightblue")

# Membuat boxplot untuk PROTEIN
boxplot(data_kp$PROTEIN, main = "Boxplot Protein", ylab = "PROTEIN", col = "lightgreen")

# Membuat boxplot untuk LEMAK
boxplot(data_kp$LEMAK, main = "Boxplot Lemak", ylab = "LEMAK", col = "lightcoral")

# Mengembalikan pengaturan layout ke default (jika diperlukan)
par(mfrow = c(1, 1))

# Mengatur layout: 1 baris, 2 kolom
par(mfrow = c(1, 2))  # Atur layout plot

# Membuat boxplot untuk KARBO
boxplot(data_kp$KONSUMSI_PEMBELIAN, main = "Boxplot Konsumsi Pembelian", ylab = "KONSUMSI PEMBELIAN", col = "lightblue")

# Membuat boxplot untuk PROTEIN
boxplot(data_kp$KONSUMSI_PRODUKSI, main = "Boxplot Konsumsi Produksi", ylab = "KONSUMSI PRODUKSI", col = "lightgreen")

# Mengembalikan pengaturan layout ke default (jika diperlukan)
par(mfrow = c(1, 1))

LINE PLOT

ggplot(data_kp, aes(x = KOTA_KAB, y = KALORI, group = 1)) +
  geom_line() +
  geom_point() +
  ggtitle("Line Plot Kalori Berdasarkan Kota/Kabupaten") +
  xlab("Kota/Kabupaten") +
  ylab("Kalori") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

ggplot(data_kp, aes(x = KOTA_KAB, y = KONSUMSI, group = 1)) +
  geom_line() +
  geom_point() +
  ggtitle("Line Plot Konsumsi Berdasarkan Kota/Kabupaten") +
  xlab("Kota/Kabupaten") +
  ylab("Konsumsi") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

DENSITY PLOT

data_viz<-data_kp %>% 
  select(-KOTA_KAB)
head(data_viz)

##   KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI   KALORI  PROTEIN    LEMAK
## 1           5.633103         0.3132981 5.946401 1804.658 51.40107 44.71250
## 2           4.887103         0.3175596 5.204663 1735.882 50.24038 41.52217
## 3           7.079314         0.1958701 7.275185 1870.661 57.00926 43.22562
## 4           6.660440         0.2398892 6.900329 1847.940 53.84075 41.64808
## 5           5.043323         0.5632651 5.606588 1652.160 48.62661 35.52076
## 6           5.658940         0.7627645 6.421704 2010.038 57.51819 42.15090
##      KARBO
## 1 253.2761
## 2 249.6948
## 3 259.3475
## 4 264.3559
## 5 249.9251
## 6 307.3939

plot_density(data = data_viz ,binary_as_factor = TRUE,
             geom_density_args = list(fill="#03A9F4"),
             nrow = 2,
             ncol = 1,
             ggtheme = theme_pubr(base_size = 9))

STANDARISASI DATA

data_standarisasi<-scale(data_viz)
head(data_standarisasi)

##      KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI   KONSUMSI       KALORI    PROTEIN
## [1,]         -0.2830039       -0.08147206 -0.2877741 -0.376017853 -1.0923781
## [2,]         -1.1106864       -0.06078817 -1.0729320 -0.968598656 -1.4013782
## [3,]          1.3215599       -0.65143767  1.1187934  0.192671985  0.4006416
## [4,]          0.8568207       -0.43778005  0.7219946 -0.003091168 -0.4428817
## [5,]         -0.9373618        1.13180349 -0.6474792 -1.689964365 -1.8309978
## [6,]         -0.2543380        2.10012193  0.2153523  1.393570815  0.5361310
##           LEMAK        KARBO
## [1,]  0.2746234 -0.497881168
## [2,] -0.5472572 -0.659347804
## [3,] -0.1084205 -0.224141691
## [4,] -0.5148216  0.001671892
## [5,] -2.0933192 -0.648967278
## [6,] -0.3852864  1.942112479

summary(data_standarisasi)

##  KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI    KONSUMSI           KALORI        
##  Min.   :-1.1789    Min.   :-1.2127   Min.   :-1.1882   Min.   :-1.92917  
##  1st Qu.:-0.7038    1st Qu.:-0.8503   1st Qu.:-0.8099   1st Qu.:-0.55791  
##  Median :-0.2547    Median :-0.3802   Median :-0.1978   Median :-0.04241  
##  Mean   : 0.0000    Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.00000  
##  3rd Qu.: 0.4866    3rd Qu.: 0.5257   3rd Qu.: 0.6152   3rd Qu.: 0.68559  
##  Max.   : 3.0416    Max.   : 2.1001   Max.   : 2.9950   Max.   : 1.94090  
##     PROTEIN            LEMAK              KARBO         
##  Min.   :-1.8310   Min.   :-2.44654   Min.   :-1.80039  
##  1st Qu.:-0.6137   1st Qu.:-0.50645   1st Qu.:-0.68965  
##  Median : 0.3747   Median :-0.06009   Median :-0.06236  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 0.7929   3rd Qu.: 0.58100   3rd Qu.: 0.43732  
##  Max.   : 1.4792   Max.   : 1.65880   Max.   : 1.94211

KORELASI

library(GGally)

## Warning: package 'GGally' was built under R version 4.3.3

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

ggcorr(data_standarisasi,method = c("everything","pearson"),geom='tile',label = TRUE)

COMPLETE LINKAGE

library(factoextra)
library(cluster)

metode_agg <-"complete"
map(metode_agg, function(i)
  
  fviz_nbclust(
  x = data_standarisasi,
  FUNcluster = hcut,
  method = "silhouette",
  hc_method = i,
  hc_fun = "hclust",
  k.max = 25
)+
  ggtitle(str_c("Optimal number of clusters based on HC with ",i," linkage"))
  
  )

## [[1]]

JARAK MATRIKS

# Hierarchical clustering dengan complete linkage
dist_matrix <- dist(data_standarisasi)  # Membuat matriks jarak
hclust_complete <- hclust(dist_matrix, method = "complete")

print(dist_matrix)

##            1         2         3         4         5         6         7
## 2  1.5652960                                                            
## 3  2.7662411 4.0057654                                                  
## 4  1.9696408 3.0860798 1.1768984                                        
## 5  3.1514446 2.1745536 4.9037974 3.9045532                              
## 6  4.1455815 4.8156161 4.1352832 3.8240719 5.1856724                    
## 7  3.2416003 4.4503407 2.2428108 2.7311688 5.9094402 3.7383672          
## 8  4.2513586 5.5055915 3.6278715 3.8447724 6.5437983 2.5563195 2.5440238
## 9  5.0760410 6.2865193 2.8633810 3.5917222 6.5228126 5.5713795 4.5228278
## 10 3.6321045 4.7364122 1.6395431 2.1257410 5.5509477 3.2690750 1.9725422
## 11 2.0063889 2.9346182 3.0073168 2.9514025 4.7266225 5.2863941 2.9095677
## 12 1.8619709 1.4926504 3.6771607 2.8228668 2.7671005 3.6247584 3.6998128
## 13 2.5504516 3.0368474 3.0909522 3.0633093 4.7361690 4.6393328 2.4080045
## 14 2.4899250 3.3804177 2.9776376 3.0112351 5.2329078 4.3852915 1.7323080
## 15 2.3064338 2.1244856 4.4082061 3.8995541 3.9478168 6.2031547 4.6052661
## 16 2.0021501 2.1864987 3.1552942 2.8029054 3.9584371 4.7286745 2.9644748
## 17 2.0377854 1.9463789 3.5375754 3.0967167 3.6593008 5.6447608 3.9026701
## 18 2.3653509 3.7123215 2.4290319 2.5904225 5.3356691 3.8979953 1.3593605
## 19 3.2516528 3.8631232 3.6029326 3.4927805 5.5453154 3.8441973 2.0187473
## 20 2.2360350 2.8960578 2.6691260 1.8751254 2.9263655 4.6958365 4.3811781
## 21 1.1269049 2.2309010 2.3683817 1.7567556 3.8418616 3.6075192 2.3075144
## 22 3.5120096 3.0350734 4.5784974 3.6610998 1.3689256 5.3718872 6.0374111
## 23 4.9670463 6.1462885 3.0274991 3.5309035 6.7739568 3.2117070 2.9977237
## 24 2.1684532 2.6013485 2.6704547 2.2128665 3.9555337 3.5298814 2.4146974
## 25 3.9180864 4.5100777 4.0161676 3.7640296 5.0895636 0.9011036 3.3302798
## 26 2.1764677 2.7249542 3.6576348 3.0185536 3.2201335 2.9064362 3.8820746
## 27 2.1889476 2.9147621 2.1998526 1.3646591 3.2119804 2.9027071 3.2762994
##            8         9        10        11        12        13        14
## 2                                                                       
## 3                                                                       
## 4                                                                       
## 5                                                                       
## 6                                                                       
## 7                                                                       
## 8                                                                       
## 9  4.9068234                                                            
## 10 2.9148461 3.5007744                                                  
## 11 4.6177351 5.1937636 4.0368658                                        
## 12 4.6046173 6.1413213 3.8946870 3.2512277                              
## 13 4.4338772 5.6422228 3.5161286 1.9044467 2.6187126                    
## 14 3.6831948 5.5587705 3.2916452 1.7907433 2.8990222 1.1687947          
## 15 6.1060404 6.5841509 5.4743926 1.9652888 3.1782272 2.9195870 3.1748751
## 16 4.8251807 5.7699315 3.7424401 1.8728373 2.0034884 0.9576744 1.6936053
## 17 5.7038572 5.8902844 4.5482797 1.7950176 2.6153553 2.0233410 2.6315676
## 18 2.7205731 4.6823249 2.8093039 1.9674486 3.2889370 2.2439964 1.3558888
## 19 3.5446256 6.2529065 3.2113669 3.2412641 2.8490024 2.0742091 1.5784338
## 20 5.0800579 3.8873576 3.7980254 3.5303520 3.3201778 4.1560939 4.2950286
## 21 3.5710633 5.0045773 2.8380791 2.0952986 1.7488599 2.0026017 1.7384298
## 22 6.6317732 5.7019096 5.3301219 5.0294748 3.5513824 5.1966894 5.6677285
## 23 2.5526589 3.7284645 1.7182840 5.4330640 5.2008149 4.9866014 4.5887235
## 24 3.9577647 5.3612537 2.6750753 2.8231230 1.5960546 1.7174880 1.9692775
## 25 2.6193607 5.7235926 3.1708503 4.8564454 3.2274748 3.9894164 3.8181445
## 26 3.7121560 5.2554899 3.9855215 3.5220165 2.3881026 3.6466202 3.5991756
## 27 3.7667496 4.0918288 2.4528237 3.6390383 2.3297086 3.4216451 3.5108258
##           15        16        17        18        19        20        21
## 2                                                                       
## 3                                                                       
## 4                                                                       
## 5                                                                       
## 6                                                                       
## 7                                                                       
## 8                                                                       
## 9                                                                       
## 10                                                                      
## 11                                                                      
## 12                                                                      
## 13                                                                      
## 14                                                                      
## 15                                                                      
## 16 2.3049422                                                            
## 17 1.3829713 1.3011564                                                  
## 18 3.6415284 2.5994950 3.2812839                                        
## 19 4.2593727 2.4192131 3.6305951 2.3123464                              
## 20 3.8067849 3.6437228 3.3499851 3.8349916 4.9899536                    
## 21 2.9340052 1.6972241 2.3669882 1.7322091 2.2139373 2.8682820          
## 22 4.5626589 4.4891166 4.1289739 5.5626499 6.0725527 2.3202614 4.1912019
## 23 6.9641712 5.3089163 6.1499975 3.8483911 4.3300144 4.9599253 4.2169388
## 24 3.5428698 1.5037904 2.5922614 2.4887180 1.8191838 3.4979416 1.3180119
## 25 5.7883464 4.1605795 5.1787147 3.5434109 3.2249942 4.7612787 3.2838710
## 26 3.9261641 3.4026750 3.7586700 3.1428549 3.9748359 2.9391470 2.4819752
## 27 4.3121184 3.1095817 3.5549618 3.1403202 3.6729087 1.9996394 2.0611859
##           22        23        24        25        26
## 2                                                   
## 3                                                   
## 4                                                   
## 5                                                   
## 6                                                   
## 7                                                   
## 8                                                   
## 9                                                   
## 10                                                  
## 11                                                  
## 12                                                  
## 13                                                  
## 14                                                  
## 15                                                  
## 16                                                  
## 17                                                  
## 18                                                  
## 19                                                  
## 20                                                  
## 21                                                  
## 22                                                  
## 23 6.4739218                                        
## 24 4.3549193 4.1651172                              
## 25 5.4081102 3.4016421 3.0209973                    
## 26 3.5932458 4.7904394 3.0581021 2.8183014          
## 27 3.0487508 3.5788999 2.2159872 2.9165808 2.2334067

hclust_complete <- hclust(dist_matrix, method = "complete")
clusters <- cutree(hclust_complete, k = 4)

data_standarisasi <- as.data.frame(data_standarisasi)
data_standarisasi$CLUSTER <- clusters
head(data_standarisasi)

##   KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI   KONSUMSI       KALORI    PROTEIN
## 1         -0.2830039       -0.08147206 -0.2877741 -0.376017853 -1.0923781
## 2         -1.1106864       -0.06078817 -1.0729320 -0.968598656 -1.4013782
## 3          1.3215599       -0.65143767  1.1187934  0.192671985  0.4006416
## 4          0.8568207       -0.43778005  0.7219946 -0.003091168 -0.4428817
## 5         -0.9373618        1.13180349 -0.6474792 -1.689964365 -1.8309978
## 6         -0.2543380        2.10012193  0.2153523  1.393570815  0.5361310
##        LEMAK        KARBO CLUSTER
## 1  0.2746234 -0.497881168       1
## 2 -0.5472572 -0.659347804       1
## 3 -0.1084205 -0.224141691       2
## 4 -0.5148216  0.001671892       2
## 5 -2.0933192 -0.648967278       3
## 6 -0.3852864  1.942112479       4

data_split <- split(data_standarisasi, data_standarisasi$CLUSTER)

# Menghitung jarak untuk setiap CLUSTER
distance_matrices <- lapply(data_split, function(sub_data) {
  sub_data <- sub_data[, -which(names(sub_data) == "CLUSTER")]  # Menghapus kolom CLUSTER
  dist(sub_data)
})

# Menampilkan matriks jarak untuk CLUSTER tertentu, misalnya CLUSTER 1
print(as.matrix(distance_matrices[[1]]))

##           1        2        7       11       12        13       14       15
## 1  0.000000 1.565296 3.241600 2.006389 1.861971 2.5504516 2.489925 2.306434
## 2  1.565296 0.000000 4.450341 2.934618 1.492650 3.0368474 3.380418 2.124486
## 7  3.241600 4.450341 0.000000 2.909568 3.699813 2.4080045 1.732308 4.605266
## 11 2.006389 2.934618 2.909568 0.000000 3.251228 1.9044467 1.790743 1.965289
## 12 1.861971 1.492650 3.699813 3.251228 0.000000 2.6187126 2.899022 3.178227
## 13 2.550452 3.036847 2.408005 1.904447 2.618713 0.0000000 1.168795 2.919587
## 14 2.489925 3.380418 1.732308 1.790743 2.899022 1.1687947 0.000000 3.174875
## 15 2.306434 2.124486 4.605266 1.965289 3.178227 2.9195870 3.174875 0.000000
## 16 2.002150 2.186499 2.964475 1.872837 2.003488 0.9576744 1.693605 2.304942
## 17 2.037785 1.946379 3.902670 1.795018 2.615355 2.0233410 2.631568 1.382971
## 18 2.365351 3.712322 1.359361 1.967449 3.288937 2.2439964 1.355889 3.641528
## 19 3.251653 3.863123 2.018747 3.241264 2.849002 2.0742091 1.578434 4.259373
## 21 1.126905 2.230901 2.307514 2.095299 1.748860 2.0026017 1.738430 2.934005
## 24 2.168453 2.601349 2.414697 2.823123 1.596055 1.7174880 1.969277 3.542870
## 26 2.176468 2.724954 3.882075 3.522016 2.388103 3.6466202 3.599176 3.926164
##           16       17       18       19       21       24       26
## 1  2.0021501 2.037785 2.365351 3.251653 1.126905 2.168453 2.176468
## 2  2.1864987 1.946379 3.712322 3.863123 2.230901 2.601349 2.724954
## 7  2.9644748 3.902670 1.359361 2.018747 2.307514 2.414697 3.882075
## 11 1.8728373 1.795018 1.967449 3.241264 2.095299 2.823123 3.522016
## 12 2.0034884 2.615355 3.288937 2.849002 1.748860 1.596055 2.388103
## 13 0.9576744 2.023341 2.243996 2.074209 2.002602 1.717488 3.646620
## 14 1.6936053 2.631568 1.355889 1.578434 1.738430 1.969277 3.599176
## 15 2.3049422 1.382971 3.641528 4.259373 2.934005 3.542870 3.926164
## 16 0.0000000 1.301156 2.599495 2.419213 1.697224 1.503790 3.402675
## 17 1.3011564 0.000000 3.281284 3.630595 2.366988 2.592261 3.758670
## 18 2.5994950 3.281284 0.000000 2.312346 1.732209 2.488718 3.142855
## 19 2.4192131 3.630595 2.312346 0.000000 2.213937 1.819184 3.974836
## 21 1.6972241 2.366988 1.732209 2.213937 0.000000 1.318012 2.481975
## 24 1.5037904 2.592261 2.488718 1.819184 1.318012 0.000000 3.058102
## 26 3.4026750 3.758670 3.142855 3.974836 2.481975 3.058102 0.000000

print(as.matrix(distance_matrices[[2]]))

##           3        4        9       20       27
## 3  0.000000 1.176898 2.863381 2.669126 2.199853
## 4  1.176898 0.000000 3.591722 1.875125 1.364659
## 9  2.863381 3.591722 0.000000 3.887358 4.091829
## 20 2.669126 1.875125 3.887358 0.000000 1.999639
## 27 2.199853 1.364659 4.091829 1.999639 0.000000

print(as.matrix(distance_matrices[[3]]))

##           5       22
## 5  0.000000 1.368926
## 22 1.368926 0.000000

print(as.matrix(distance_matrices[[4]]))

##            6        8       10       23        25
## 6  0.0000000 2.556319 3.269075 3.211707 0.9011036
## 8  2.5563195 0.000000 2.914846 2.552659 2.6193607
## 10 3.2690750 2.914846 0.000000 1.718284 3.1708503
## 23 3.2117070 2.552659 1.718284 0.000000 3.4016421
## 25 0.9011036 2.619361 3.170850 3.401642 0.0000000

distance_matrix_1 <- as.matrix(distance_matrices[[1]])
distance_matrix_2 <- as.matrix(distance_matrices[[2]])
distance_matrix_3 <- as.matrix(distance_matrices[[3]])
distance_matrix_4 <- as.matrix(distance_matrices[[4]])

# Mengambil nilai maksimum dari matriks jarak (kecuali diagonal)
max_distance_1 <- max(distance_matrix_1)
max_distance_2 <- max(distance_matrix_2)
max_distance_3 <- max(distance_matrix_3)
max_distance_4 <- max(distance_matrix_4)

# Menampilkan jarak maksimum
sprintf("Maksimum Jarak Matriks Cluster 1 : %.2f", max_distance_1)

## [1] "Maksimum Jarak Matriks Cluster 1 : 4.61"

sprintf("Maksimum Jarak Matriks Cluster 2 : %.2f", max_distance_2)

## [1] "Maksimum Jarak Matriks Cluster 2 : 4.09"

sprintf("Maksimum Jarak Matriks Cluster 3 : %.2f", max_distance_3)

## [1] "Maksimum Jarak Matriks Cluster 3 : 1.37"

sprintf("Maksimum Jarak Matriks Cluster 4 : %.2f", max_distance_4)

## [1] "Maksimum Jarak Matriks Cluster 4 : 3.40"

DENDOGRAM

# Plot dendrogram
plot(hclust_complete, labels = data_kp$KOTA_KAB, main = "Hierarchical Clustering Dendrogram")
rect.hclust(hclust_complete,4)

KUALITAS HASIL CLUSTERING

SILHOUETTE INDEX

library(fpc)

## Warning: package 'fpc' was built under R version 4.3.3

# Hierarchical Clustering dengan complete linkage
hc <- hclust(dist(data_standarisasi), method = "complete")

calculate_silhouette_index <- function(data, hc, k) {
  clusters <- cutree(hc, k = k)
  
  # Hitung jarak antar objek
  dist_matrix <- dist(data)
  
  # Hitung silhouette
  silhouette_result <- silhouette(clusters, dist_matrix)
  
  # Rata-rata nilai silhouette
  silhouette_index <- mean(silhouette_result[, 3])
  
  return(silhouette_index)
}

# Hitung Silhouette Index untuk k = 2, 3, dan 4
k_values <- c(2:10)
silhouette_results <- sapply(k_values, function(k) calculate_silhouette_index(data_standarisasi, hc, k))

silhouette_index <- data.frame(
  Clusters = k_values,
  Silhouette_Index = silhouette_results
)

print(silhouette_index)

##   Clusters Silhouette_Index
## 1        2        0.2797809
## 2        3        0.3251431
## 3        4        0.3447322
## 4        5        0.2826450
## 5        6        0.2777626
## 6        7        0.2852300
## 7        8        0.2888611
## 8        9        0.2712916
## 9       10        0.2691137

ggplot(silhouette_index, aes(x = Clusters, y = Silhouette_Index)) +
  geom_line(color = "lightblue", linewidth = 1) +  # Warna garis biru
  labs(x = "Cluster", y = "Silhouette Index", title = "Line Plot Silhouette Index Berdasarkan Cluster") +  # Nama variabel dan judul plot kustom
  theme_minimal()

ggplot(silhouette_index, aes(x = Clusters, y = Silhouette_Index)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  labs(x = "Cluster", y = "Silhouette Index", title = "Bar Plot Silhouette Index Berdasarkan Cluster") +
  theme_minimal()

Silhouette Index mengukur seberapa baik setiap objek cocok dengan cluster-nya dibandingkan dengan cluster lainnya. Nilai yang lebih tinggi berada pada cluster sebanyak 4, sehingga cluster k = 4 merupakan clustering yang lebih baik berdasarkan Silhouette Index.

DAVIES-BOULDIN INDEX

library(clusterSim)

## Warning: package 'clusterSim' was built under R version 4.3.3

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

# Hierarchical Clustering dengan complete linkage
hc <- hclust(dist(data_standarisasi), method = "complete")

calculate_db_index <- function(data, hc, k) {
  clusters <- cutree(hc, k = k)
  
  # Hitung Davies-Bouldin Index
  db_index <- index.DB(data, clusters)$DB
  
  return(db_index)
}

# Hitung DB Index untuk k = 2, 3, dan 4
k_values <- c(2:10)
db_results <- sapply(k_values, function(k) calculate_db_index(data_standarisasi, hc, k))

db_index <- data.frame(
  Clusters = k_values,
  Davies_Bouldin_Index = db_results
)

print(db_index)

##   Clusters Davies_Bouldin_Index
## 1        2            1.4541759
## 2        3            1.0755999
## 3        4            0.9709037
## 4        5            1.0930343
## 5        6            0.9435998
## 6        7            0.8613765
## 7        8            0.8492397
## 8        9            0.8576456
## 9       10            0.8013965

ggplot(db_index, aes(x = Clusters, y = Davies_Bouldin_Index)) +
  geom_line(color = "lightgreen", linewidth = 1) + 
  labs(x = "Cluster", y = "Davies-Bouldin Index", title = "Line Plot Davies-Bouldin Index Berdasarkan Cluster") +  # Nama variabel dan judul plot kustom
  theme_minimal()

ggplot(db_index, aes(x = Clusters, y = Davies_Bouldin_Index)) +
  geom_bar(stat = "identity", fill = "lightgreen") +
  labs(x = "Cluster", y = "Davies-Bouldin Index", title = "Bar Plot Davies-Bouldin Index Berdasarkan Cluster") +
  theme_minimal()

Davies-Bouldin Index mengukur rata-rata rasio jarak intra-cluster terhadap jarak antar-cluster. Nilai yang rendah berada pada cluster sebanyak 4, sehingga cluster k = 4 merupakan clustering yang lebih baik berdasarkan Davies-Bouldin Index.

DUNN INDEX

library(fpc)

# Hierarchical Clustering dengan complete linkage
hc <- hclust(dist(data_standarisasi), method = "complete")

calculate_dunn_index <- function(data, hc, k) {
  clusters <- cutree(hc, k = k)
  
  # Hitung jarak antar objek
  dist_matrix <- dist(data)
  
  # Hitung Dunn Index
  dunn_index <- cluster.stats(dist_matrix, clusters)$dunn
  
  return(dunn_index)
}

# Hitung Dunn Index untuk k = 2, 3, dan 4
k_values <- c(2:10)
dunn_results <- sapply(k_values, function(k) calculate_dunn_index(data_standarisasi, hc, k))

dunn_index <- data.frame(
  Clusters = k_values,
  Dunn_Index = dunn_results
)

print(dunn_index)

##   Clusters Dunn_Index
## 1        2  0.3161736
## 2        3  0.3334068
## 3        4  0.4389394
## 4        5  0.3179890
## 5        6  0.3314065
## 6        7  0.3825083
## 7        8  0.4389163
## 8        9  0.4289227
## 9       10  0.4378942

ggplot(dunn_index, aes(x = Clusters, y = Dunn_Index)) +
  geom_line(color = "lightcoral", linewidth = 1) + 
  labs(x = "Cluster", y = "Dunn Index", title = "Line Plot Dunn Index Berdasarkan Cluster") +  # Nama variabel dan judul plot kustom
  theme_minimal()

ggplot(dunn_index, aes(x = Clusters, y = Dunn_Index)) +
  geom_bar(stat = "identity", fill = "lightcoral") +
  labs(x = "Cluster", y = "Dunn Index", title = "Bar Plot Dunn Index Berdasarkan Cluster") +
  theme_minimal()

Dunn Index mengukur rasio jarak terjauh antar-cluster terhadap jarak terdekat intra-cluster. Nilai yang lebih tinggi berada pada cluster sebanyak 4, sehingga cluster k = 4 merupakan clustering yang lebih baik berdasarkan Dunn Index.

WITHIN-CLUSTER SUM OF SQUARES INDEX

hc <- hclust(dist(data_standarisasi), method = "complete")

calculate_wcss <- function(data, hc, k) {
  clusters <- cutree(hc, k = k)
  distances <- dist(data)
  
  wcss <- sum(sapply(unique(clusters), function(cluster) {
    # Ambil data untuk cluster ini
    cluster_data <- data[clusters == cluster, ]
    
    # Hitung jarak antar objek dalam cluster ini
    cluster_distances <- dist(cluster_data)
    
    # Jumlahkan kuadrat dari jarak
    sum(cluster_distances^2) / 2  # Bagikan dengan 2 untuk menghindari penghitungan ganda
  }))
  
  return(wcss)
}

# Hitung WCSS untuk k = 2, 3, dan 4
k_values <- c(2:10)
wcss_results <- sapply(k_values, function(k) calculate_wcss(data_standarisasi, hc, k))

# Tampilkan hasil
wcss_index <- data.frame(
  Clusters = k_values,
  WCSS_Index = wcss_results
)

print(wcss_index)

##   Clusters WCSS_Index
## 1        2 1053.79485
## 2        3  691.62102
## 3        4  445.45215
## 4        5  203.00418
## 5        6  176.52717
## 6        7  117.29138
## 7        8   88.47150
## 8        9   58.48111
## 9       10   46.46830

ggplot(wcss_index, aes(x = Clusters, y = WCSS_Index)) +
  geom_line(color = "lightpink", linewidth = 1) + 
  labs(x = "Cluster", y = "Within-Cluster Sum of Squares Index", title = "Line Plot Within-Cluster Sum of Squares Index Berdasarkan Cluster") +  # Nama variabel dan judul plot kustom
  theme_minimal()

ggplot(wcss_index, aes(x = Clusters, y = WCSS_Index)) +
  geom_bar(stat = "identity", fill = "lightpink") +
  labs(x = "Cluster", y = "Within-Cluster Sum of Squares Index", title = "Bar Plot Within-Cluster Sum of Squares Index Berdasarkan Cluster") +
  theme_minimal()

Within-Cluster Sum of Squares Index mengukur variansi dalam cluster. Nilai yang rendah berada pada cluster sebanyak 4, sehingga cluster k = 4 merupakan clustering yang lebih baik berdasarkan Within-Cluster Sum of Squares Index.

CALINSKI HARABASZ INDEX

library(fpc)

# Hierarchical Clustering dengan complete linkage
hc <- hclust(dist(data_standarisasi), method = "complete")

calculate_ch_index <- function(data, hc, k) {
  clusters <- cutree(hc, k = k)
  
  # Hitung jarak antar objek
  dist_matrix <- dist(data)
  
  # Hitung CH Index
  ch_index <- cluster.stats(dist_matrix, clusters)$ch
  
  return(ch_index)
}

# Hitung CH Index untuk k = 2, 3, dan 4
k_values <- c(2:10)
ch_results <- sapply(k_values, function(k) calculate_ch_index(data_standarisasi, hc, k))

ch_index <- data.frame(
  Clusters = k_values,
  Calinski_Harabasz_Index = ch_results
)

print(ch_index)

##   Clusters Calinski_Harabasz_Index
## 1        2                11.33669
## 2        3                10.92595
## 3        4                13.21850
## 4        5                13.21784
## 5        6                12.55976
## 6        7                12.87354
## 7        8                13.66953
## 8        9                13.71480
## 9       10                13.47410

ggplot(ch_index, aes(x = Clusters, y = Calinski_Harabasz_Index)) +
  geom_line(color = "lightskyblue", linewidth = 1) + 
  labs(x = "Cluster", y = "Calinski Harabasz Index", title = "Line Plot Calinski Harabasz Index Berdasarkan Cluster") +  # Nama variabel dan judul plot kustom
  theme_minimal()

ggplot(ch_index, aes(x = Clusters, y = Calinski_Harabasz_Index)) +
  geom_bar(stat = "identity", fill = "lightskyblue") +
  labs(x = "Cluster", y = "Calinski Harabasz Index", title = "Bar Plot Calinski Harabasz Index Berdasarkan Cluster") +
  theme_minimal()

Calinski-Harabasz Index mengukur mengukur rasio variansi antar-cluster terhadap variansi intra-cluster. Nilai yang lebih tinggi berada pada cluster sebanyak 4, sehingga cluster k = 4 merupakan clustering yang lebih baik berdasarkan Calinski-Harabasz Index.

# Buat daftar dari semua dataframe yang ingin digabungkan
list_of_dataframes <- list(silhouette_index, db_index, dunn_index, wcss_index, ch_index)

# Gabungkan semua dataframe dalam daftar berdasarkan kolom 'Clusters'
cluster_index <- Reduce(function(x, y) merge(x, y, by = "Clusters"), list_of_dataframes)

# Hasil akhir adalah merged_df yang berisi semua dataframe yang digabungkan
print(cluster_index)

##   Clusters Silhouette_Index Davies_Bouldin_Index Dunn_Index WCSS_Index
## 1        2        0.2797809            1.4541759  0.3161736 1053.79485
## 2        3        0.3251431            1.0755999  0.3334068  691.62102
## 3        4        0.3447322            0.9709037  0.4389394  445.45215
## 4        5        0.2826450            1.0930343  0.3179890  203.00418
## 5        6        0.2777626            0.9435998  0.3314065  176.52717
## 6        7        0.2852300            0.8613765  0.3825083  117.29138
## 7        8        0.2888611            0.8492397  0.4389163   88.47150
## 8        9        0.2712916            0.8576456  0.4289227   58.48111
## 9       10        0.2691137            0.8013965  0.4378942   46.46830
##   Calinski_Harabasz_Index
## 1                11.33669
## 2                10.92595
## 3                13.21850
## 4                13.21784
## 5                12.55976
## 6                12.87354
## 7                13.66953
## 8                13.71480
## 9                13.47410

LINE PLOT CLUSTER INDEX

long_df <- cluster_index %>%
  pivot_longer(cols = c(Silhouette_Index, Davies_Bouldin_Index, Dunn_Index, WCSS_Index, Calinski_Harabasz_Index), 
               names_to = "Index", values_to = "Value")

ggplot(long_df, aes(x = Clusters, y = Value, group = Index, color = Index)) +
  geom_line(size = 1) +
  facet_wrap(~ Index, scales = "free_y", ncol = 5) +
  labs(x = "Cluster", y = "Index Value", title = "Line Plot Cluster Index Berdasarkan Cluster") +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

BAR PLOT CLUSTER INDEX

library(tidyr)

# Gabungkan data dalam format long untuk memudahkan plotting
cluster_index <- cluster_index %>%
  pivot_longer(cols = -Clusters, names_to = "Index", values_to = "Value")

# Membuat diagram batang dengan layout 5 kolom
ggplot(cluster_index, aes(x = Clusters, y = Value, fill = Index)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Index, scales = "free_y", ncol = 5) +
  labs(x = "Clusters", y = "Values", title = "Bar Plot Cluster Index Berdasarkan Cluster") +
  theme_minimal()

PETA

hclust_complete <- hclust(dist_matrix, method = "complete")
clusters <- cutree(hclust_complete, k = 4)
data_kp$CLUSTER <- clusters
data_kp

##            KOTA_KAB KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI   KALORI
## 1           Bandung           5.633103        0.31329813 5.946401 1804.658
## 2     Bandung Barat           4.887103        0.31755956 5.204663 1735.882
## 3            Bekasi           7.079314        0.19587008 7.275185 1870.661
## 4             Bogor           6.660440        0.23988922 6.900329 1847.940
## 5            Ciamis           5.043323        0.56326513 5.606588 1652.160
## 6           Cianjur           5.658940        0.76276445 6.421704 2010.038
## 7           Cirebon           6.272175        0.16882896 6.441003 1967.718
## 8             Garut           6.381393        0.56620620 6.947599 2073.562
## 9         Indramayu           8.629633        0.41801805 9.047651 1814.218
## 10         Karawang           6.956205        0.24104930 7.197254 1938.188
## 11     Kota Bandung           5.734377        0.14097098 5.875348 1772.523
## 12      Kota Banjar           4.825581        0.35930953 5.184891 1822.153
## 13      Kota Bekasi           5.238051        0.12457213 5.362623 1843.376
## 14       Kota Bogor           5.426083        0.11755026 5.543634 1904.372
## 15      Kota Cimahi           4.969171        0.12658823 5.095759 1685.572
## 16     Kota Cirebon           5.148019        0.11725711 5.265276 1794.572
## 17       Kota Depok           5.269708        0.08022675 5.349935 1722.057
## 18    Kota Sukabumi           6.019188        0.25175779 6.270946 1917.551
## 19 Kota Tasikmalaya           5.096843        0.10568572 5.202528 1955.900
## 20         Kuningan           6.696064        0.41114010 7.107204 1726.503
## 21       Majalengka           5.658582        0.23893214 5.897514 1860.445
## 22      Pangandaran           5.885834        0.57146676 6.457301 1624.397
## 23       Purwakarta           7.438918        0.40114391 7.840062 2044.058
## 24           Subang           5.414195        0.19320197 5.607397 1858.440
## 25         Sukabumi           5.339491        0.69194640 6.031437 1992.707
## 26         Sumedang           5.379312        0.73499088 6.114303 1824.296
## 27      Tasikmalaya           6.239748        0.45876590 6.698514 1840.121
##     PROTEIN    LEMAK    KARBO CLUSTER
## 1  51.40107 44.71250 253.2761       1
## 2  50.24038 41.52217 249.6948       1
## 3  57.00926 43.22562 259.3475       2
## 4  53.84075 41.64808 264.3559       2
## 5  48.62661 35.52076 249.9251       3
## 6  57.51819 42.15090 307.3939       4
## 7  60.34936 47.97507 274.1305       1
## 8  58.49617 50.01194 297.3845       4
## 9  58.46919 43.41321 250.2679       2
## 10 59.82331 42.38142 287.9881       4
## 11 55.43122 49.34251 235.2956       1
## 12 53.43185 40.71733 270.3163       1
## 13 59.69793 45.31480 247.0994       1
## 14 58.29864 48.41948 257.7365       1
## 15 51.10099 47.13435 224.3872       1
## 16 56.93196 43.69931 248.3197       1
## 17 54.22694 43.58696 229.4392       1
## 18 56.91189 50.08551 262.9356       1
## 19 59.27829 46.48874 286.7551       1
## 20 49.97542 40.41305 248.3504       2
## 21 54.02775 44.86992 267.5551       1
## 22 48.67894 34.14966 248.2808       3
## 23 61.06077 43.34204 305.1252       4
## 24 57.33747 41.71305 273.9062       1
## 25 59.38748 42.58825 299.8753       4
## 26 52.96619 44.46878 263.9270       1
## 27 54.09913 39.55965 273.5392       2

library(ggplot2)
library(sf)

## Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE

library(dplyr)  # Pastikan dplyr sudah dimuat

jabar=st_read(dsn="C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/SHP/SHP", layer="provjabar")

## Reading layer `provjabar' from data source 
##   `C:\Users\Tasya Anisah Rizqi\Documents\IPB\PEMODELAN KLASIFIKASI\SHP\SHP' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY, XYZ
## Bounding box:  xmin: 106.3703 ymin: -7.82099 xmax: 108.8468 ymax: -5.806538
## z_range:       zmin: 0 zmax: 0
## Geodetic CRS:  WGS 84

plot(jabar)

# Memastikan bahwa peta adalah 2D
jabar_2d <- st_zm(jabar, drop = TRUE)

# Memeriksa validitas geometri
valid_geometries <- st_is_valid(jabar_2d)

# Memperbaiki geometri yang tidak valid
jabarmap_valid <- st_make_valid(jabar_2d)

# Menggabungkan data hasil cluster dan data geospasial
peta_jabar2 <- jabarmap_valid %>%
  left_join(data_kp, by = c("NAMOBJ" = "KOTA_KAB"))

# Memastikan bahwa peta adalah 2D
peta_jabar3 <- st_zm(peta_jabar2, drop = TRUE)

# Visualisasi hasil cluster pada peta Jawa Barat dengan gradasi warna berbeda
ggplot(peta_jabar3) +
  geom_sf(aes(fill = as.factor(CLUSTER)), color = "black") +
  scale_fill_manual(values = c("#FF7F50", "#4682B4", "#32CD32", "#FFD700"), name = "Cluster") +
  geom_sf_text(aes(label = NAMOBJ), size = 3.5, color = "black") +
  labs(title = "Visualisasi Hasil Clustering Complete Linkage pada Peta Provinsi Jawa Barat") +
  theme_minimal()

## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data

LINE PLOT (RATA-RATA)

# Menentukan rata-rata setiap variabel berdasarkan cluster
cluster_means <- aggregate(cbind(KONSUMSI_PEMBELIAN,KONSUMSI_PRODUKSI,KONSUMSI,KALORI,PROTEIN,LEMAK,KARBO)~CLUSTER,data_standarisasi, FUN = mean,na.rm =TRUE)

# Menampilkan hasil rata-rata untuk setiap cluster
print(cluster_means)

##   CLUSTER KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI   KONSUMSI     KALORI
## 1       1         -0.5437390       -0.50495850 -0.6288910 -0.1464560
## 2       2          1.3012841        0.07112251  1.2570297 -0.2447867
## 3       3         -0.4699810        1.15170780 -0.1972234 -1.8095659
## 4       4          0.5179252        0.98306987  0.7085326  1.4079810
##       PROTEIN      LEMAK      KARBO
## 1 -0.01656162  0.4354270 -0.3607175
## 2 -0.21978928 -0.5138318 -0.2320446
## 3 -1.82403171 -2.2699290 -0.6860344
## 4  0.99908683  0.1155222  1.5886107

# Menyusun data untuk visualisasi
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

cluster_means_long <- melt(cluster_means, id.vars = "CLUSTER", variable.name = "Variabel", value.name = "Nilai_Rata_Rata")

# Membuat visualisasi dengan nama variabel yang sesuai dari data_standarisasi
library(ggplot2)

ggplot(cluster_means_long, aes(x = Variabel, y = Nilai_Rata_Rata, group = CLUSTER, color = CLUSTER)) +
  geom_line() +
  geom_point() +
  labs(title = "Nilai Rata-Rata Setiap Variabel Berdasarkan Cluster",
       x = "Variabel",
       y = "Nilai Rata-Rata") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Memiringkan teks pada sumbu x

COMPLETE LINKAGE

KEVIN ALIFVIANSYAH, TASYA ANISAH RIZQI, NABILA TRI AMANDA, DHIYA ULAYYA TSABITAH

2024-08-28

LIBRARY

DATA

MENGOBAH KODE KABUPATEN/KOTA

MENGUBAH NAMA KOLOM

MEMANGGIL DATA DENGAN KOLOM TERPILIH

MENYATUKAN DATA BERDASARKAN KABUPATEN/KOTA

MENYIMPAN DATA

BOX PLOT

LINE PLOT

DENSITY PLOT

STANDARISASI DATA

KORELASI

COMPLETE LINKAGE

JARAK MATRIKS

DENDOGRAM

KUALITAS HASIL CLUSTERING

SILHOUETTE INDEX

DAVIES-BOULDIN INDEX

DUNN INDEX

WITHIN-CLUSTER SUM OF SQUARES INDEX

CALINSKI HARABASZ INDEX

LINE PLOT CLUSTER INDEX

BAR PLOT CLUSTER INDEX

PETA

LINE PLOT (RATA-RATA)