library(foreign)
## Warning: package 'foreign' was built under R version 4.3.3
library(psych)
## Warning: package 'psych' was built under R version 4.3.2
library(GPArotation)
## Warning: package 'GPArotation' was built under R version 4.3.3
##
## Attaching package: 'GPArotation'
## The following objects are masked from 'package:psych':
##
## equamax, varimin
library(clValid)
## Warning: package 'clValid' was built under R version 4.3.3
## Loading required package: cluster
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(cluster)
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.3.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(tidyverse)
## Warning: package 'purrr' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ ggplot2::%+%() masks psych::%+%()
## ✖ ggplot2::alpha() masks psych::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Warning: package 'car' was built under R version 4.3.2
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
##
## The following object is masked from 'package:psych':
##
## logit
library(kernlab)
##
## Attaching package: 'kernlab'
##
## The following object is masked from 'package:purrr':
##
## cross
##
## The following object is masked from 'package:ggplot2':
##
## alpha
##
## The following object is masked from 'package:psych':
##
## alpha
library(DataExplorer)
library(Rtsne)
## Warning: package 'Rtsne' was built under R version 4.3.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 4.3.3
library(tictoc)
##
## Attaching package: 'tictoc'
##
## The following object is masked from 'package:kernlab':
##
## size
library(dplyr)
data_kp_41 <- read.dbf("C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/SUSENAS/Jawa Barat/2023/dbf/Modul KP (Konsumsi Pengeluaran)/32_ssn_202303_kp_blok41.dbf")
head(data_kp_41)
## URUT R101 R102 R105 R301 KODE KLP COICOP B41K5 B41K6 B41K7 B41K8 B41K9
## 1 500001 32 7 2 4 1 0 <NA> 0.0 92900 0 0 0.0
## 2 500001 32 7 2 4 2 1 01111001 7.0 82900 0 0 7.0
## 3 500001 32 7 2 4 6 1 01115005 0.5 10000 0 0 0.5
## 4 500001 32 7 2 4 8 0 <NA> 0.0 0 0 18000 0.0
## 5 500001 32 7 2 4 9 8 01178001 0.0 0 3 6000 3.0
## 6 500001 32 7 2 4 10 8 01178002 0.0 0 1 6000 1.0
## B41K10 KALORI PROTEIN LEMAK KARBO WERT WEIND PSU SSU WI1
## 1 92900 27019.0 638.25 106.50 5814.50 454.8891 1819.556 12448 123442 12435
## 2 82900 25354.0 593.25 101.50 5428.50 454.8891 1819.556 12448 123442 12435
## 3 10000 1665.0 45.00 5.00 386.00 454.8891 1819.556 12448 123442 12435
## 4 18000 6314.4 52.78 14.69 1489.39 454.8891 1819.556 12448 123442 12435
## 5 6000 3927.0 25.50 7.65 938.40 454.8891 1819.556 12448 123442 12435
## 6 6000 1252.0 11.78 3.26 293.68 454.8891 1819.556 12448 123442 12435
## WI2
## 1 123427
## 2 123427
## 3 123427
## 4 123427
## 5 123427
## 6 123427
data_kp_43 <- read.dbf("C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/SUSENAS/Jawa Barat/2023/dbf/Modul KP (Konsumsi Pengeluaran)/32_ssn_202303_kp_blok43.dbf")
head(data_kp_43)
## URUT R101 R102 R105 R301 FOOD NONFOOD EXPEND KAPITA KALORI_KAP
## 1 500001 32 7 2 4 2660400 2304033.3 4964433 1241108.3 2365.173
## 2 500002 32 72 1 2 1108714 525166.7 1633881 816940.5 2611.639
## 3 500003 32 6 2 3 2413886 1398333.3 3812219 1270739.7 2526.510
## 4 500004 32 72 1 7 7770000 4313333.3 12083333 1726190.5 3655.855
## 5 500005 32 77 1 3 4932557 46219750.0 51152307 17050769.0 2330.988
## 6 500006 32 77 1 2 3272143 11400716.7 14672860 7336429.8 2330.760
## PROTE_KAP LEMAK_KAP KARBO_KAP WERT WEIND PSU SSU WI1 WI2
## 1 67.13024 43.22239 352.9857 454.889101 1819.55641 12448 123442 12435 123427
## 2 69.09189 30.11824 471.9977 172.376821 344.75364 31373 311039 31360 311024
## 3 68.74262 58.51762 360.4562 241.578661 724.73598 12092 119908 12079 119893
## 4 141.50262 119.46043 454.9395 93.673563 655.71494 31135 308689 31122 308674
## 5 78.53335 53.14161 254.8461 122.217556 366.65267 33988 336798 33975 336783
## 6 68.15351 60.91893 317.6763 7.638085 15.27617 34062 337531 34049 337516
# Menggabungkan dua tabel berdasarkan kolom "URUT"
data <- merge(data_kp_41, data_kp_43, by = "URUT")
# Menampilkan hasil penggabungan
head(data)
## URUT R101.x R102.x R105.x R301.x KODE KLP COICOP B41K5 B41K6 B41K7 B41K8
## 1 500001 32 7 2 4 1 0 <NA> 0.0 92900 0 0
## 2 500001 32 7 2 4 2 1 01111001 7.0 82900 0 0
## 3 500001 32 7 2 4 6 1 01115005 0.5 10000 0 0
## 4 500001 32 7 2 4 8 0 <NA> 0.0 0 0 18000
## 5 500001 32 7 2 4 9 8 01178001 0.0 0 3 6000
## 6 500001 32 7 2 4 10 8 01178002 0.0 0 1 6000
## B41K9 B41K10 KALORI PROTEIN LEMAK KARBO WERT.x WEIND.x PSU.x SSU.x
## 1 0.0 92900 27019.0 638.25 106.50 5814.50 454.8891 1819.556 12448 123442
## 2 7.0 82900 25354.0 593.25 101.50 5428.50 454.8891 1819.556 12448 123442
## 3 0.5 10000 1665.0 45.00 5.00 386.00 454.8891 1819.556 12448 123442
## 4 0.0 18000 6314.4 52.78 14.69 1489.39 454.8891 1819.556 12448 123442
## 5 3.0 6000 3927.0 25.50 7.65 938.40 454.8891 1819.556 12448 123442
## 6 1.0 6000 1252.0 11.78 3.26 293.68 454.8891 1819.556 12448 123442
## WI1.x WI2.x R101.y R102.y R105.y R301.y FOOD NONFOOD EXPEND KAPITA
## 1 12435 123427 32 7 2 4 2660400 2304033 4964433 1241108
## 2 12435 123427 32 7 2 4 2660400 2304033 4964433 1241108
## 3 12435 123427 32 7 2 4 2660400 2304033 4964433 1241108
## 4 12435 123427 32 7 2 4 2660400 2304033 4964433 1241108
## 5 12435 123427 32 7 2 4 2660400 2304033 4964433 1241108
## 6 12435 123427 32 7 2 4 2660400 2304033 4964433 1241108
## KALORI_KAP PROTE_KAP LEMAK_KAP KARBO_KAP WERT.y WEIND.y PSU.y SSU.y WI1.y
## 1 2365.173 67.13024 43.22239 352.9857 454.8891 1819.556 12448 123442 12435
## 2 2365.173 67.13024 43.22239 352.9857 454.8891 1819.556 12448 123442 12435
## 3 2365.173 67.13024 43.22239 352.9857 454.8891 1819.556 12448 123442 12435
## 4 2365.173 67.13024 43.22239 352.9857 454.8891 1819.556 12448 123442 12435
## 5 2365.173 67.13024 43.22239 352.9857 454.8891 1819.556 12448 123442 12435
## 6 2365.173 67.13024 43.22239 352.9857 454.8891 1819.556 12448 123442 12435
## WI2.y
## 1 123427
## 2 123427
## 3 123427
## 4 123427
## 5 123427
## 6 123427
data <- data %>%
select(R102.x,B41K5,B41K9,B41K7,KALORI,PROTEIN,LEMAK,KARBO)
head(data)
## R102.x B41K5 B41K9 B41K7 KALORI PROTEIN LEMAK KARBO
## 1 7 0.0 0.0 0 27019.0 638.25 106.50 5814.50
## 2 7 7.0 7.0 0 25354.0 593.25 101.50 5428.50
## 3 7 0.5 0.5 0 1665.0 45.00 5.00 386.00
## 4 7 0.0 0.0 0 6314.4 52.78 14.69 1489.39
## 5 7 0.0 3.0 3 3927.0 25.50 7.65 938.40
## 6 7 0.0 1.0 1 1252.0 11.78 3.26 293.68
#Create a mapping vector for the R102 variable
labels <- c(
"1" = "Bogor",
"2" = "Sukabumi",
"3" = "Cianjur",
"4" = "Bandung",
"5" = "Garut",
"6" = "Tasikmalaya",
"7" = "Ciamis",
"8" = "Kuningan",
"9" = "Cirebon",
"10" = "Majalengka",
"11" = "Sumedang",
"12" = "Indramayu",
"13" = "Subang",
"14" = "Purwakarta",
"15" = "Karawang",
"16" = "Bekasi",
"17" = "Bandung Barat",
"18" = "Pangandaran",
"71" = "Kota Bogor",
"72" = "Kota Sukabumi",
"73" = "Kota Bandung",
"74" = "Kota Cirebon",
"75" = "Kota Bekasi",
"76" = "Kota Depok",
"77" = "Kota Cimahi",
"78" = "Kota Tasikmalaya",
"79" = "Kota Banjar"
)
data$KOTA_KAB <- labels[as.character(data$R102.x)]
# Display the first few rows of the data with the labeled R102
head(data[, c("R102.x", "KOTA_KAB")])
## R102.x KOTA_KAB
## 1 7 Ciamis
## 2 7 Ciamis
## 3 7 Ciamis
## 4 7 Ciamis
## 5 7 Ciamis
## 6 7 Ciamis
data <- data %>%
rename(
"KONSUMSI_PEMBELIAN" = "B41K5",
"KONSUMSI_PRODUKSI" = "B41K7",
"KONSUMSI" = "B41K9"
)
data_select<-data[,c("KOTA_KAB","KONSUMSI_PEMBELIAN","KONSUMSI_PRODUKSI","KONSUMSI","KALORI","PROTEIN","LEMAK","KARBO")]
head(data_select)
## KOTA_KAB KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI KALORI PROTEIN LEMAK
## 1 Ciamis 0.0 0 0.0 27019.0 638.25 106.50
## 2 Ciamis 7.0 0 7.0 25354.0 593.25 101.50
## 3 Ciamis 0.5 0 0.5 1665.0 45.00 5.00
## 4 Ciamis 0.0 0 0.0 6314.4 52.78 14.69
## 5 Ciamis 0.0 3 3.0 3927.0 25.50 7.65
## 6 Ciamis 0.0 1 1.0 1252.0 11.78 3.26
## KARBO
## 1 5814.50
## 2 5428.50
## 3 386.00
## 4 1489.39
## 5 938.40
## 6 293.68
plot_intro(data_select,ggtheme = theme_pubr())
data_kp <-aggregate(cbind(KONSUMSI_PEMBELIAN,KONSUMSI_PRODUKSI,KONSUMSI,KALORI,PROTEIN,LEMAK,KARBO)~KOTA_KAB,data_select,
FUN = mean,na.rm =TRUE)
head(data_kp)
## KOTA_KAB KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI KALORI PROTEIN
## 1 Bandung 5.633103 0.3132981 5.946401 1804.658 51.40107
## 2 Bandung Barat 4.887103 0.3175596 5.204663 1735.882 50.24038
## 3 Bekasi 7.079314 0.1958701 7.275185 1870.661 57.00926
## 4 Bogor 6.660440 0.2398892 6.900329 1847.940 53.84075
## 5 Ciamis 5.043323 0.5632651 5.606588 1652.160 48.62661
## 6 Cianjur 5.658940 0.7627645 6.421704 2010.038 57.51819
## LEMAK KARBO
## 1 44.71250 253.2761
## 2 41.52217 249.6948
## 3 43.22562 259.3475
## 4 41.64808 264.3559
## 5 35.52076 249.9251
## 6 42.15090 307.3939
write.csv(data_kp, file = "C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/data_kp.csv", row.names = FALSE)
library(writexl)
write_xlsx(data_kp, "C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/data_kp.xlsx")
# Mengatur layout: 1 baris, 3 kolom
par(mfrow = c(1, 3)) # Atur layout plot
# Membuat boxplot untuk KARBO
boxplot(data_kp$KARBO, main = "Boxplot Karbo", ylab = "KARBO", col = "lightblue")
# Membuat boxplot untuk PROTEIN
boxplot(data_kp$PROTEIN, main = "Boxplot Protein", ylab = "PROTEIN", col = "lightgreen")
# Membuat boxplot untuk LEMAK
boxplot(data_kp$LEMAK, main = "Boxplot Lemak", ylab = "LEMAK", col = "lightcoral")
# Mengembalikan pengaturan layout ke default (jika diperlukan)
par(mfrow = c(1, 1))
# Mengatur layout: 1 baris, 2 kolom
par(mfrow = c(1, 2)) # Atur layout plot
# Membuat boxplot untuk KARBO
boxplot(data_kp$KONSUMSI_PEMBELIAN, main = "Boxplot Konsumsi Pembelian", ylab = "KONSUMSI PEMBELIAN", col = "lightblue")
# Membuat boxplot untuk PROTEIN
boxplot(data_kp$KONSUMSI_PRODUKSI, main = "Boxplot Konsumsi Produksi", ylab = "KONSUMSI PRODUKSI", col = "lightgreen")
# Mengembalikan pengaturan layout ke default (jika diperlukan)
par(mfrow = c(1, 1))
ggplot(data_kp, aes(x = KOTA_KAB, y = KALORI, group = 1)) +
geom_line() +
geom_point() +
ggtitle("Line Plot Kalori Berdasarkan Kota/Kabupaten") +
xlab("Kota/Kabupaten") +
ylab("Kalori") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplot(data_kp, aes(x = KOTA_KAB, y = KONSUMSI, group = 1)) +
geom_line() +
geom_point() +
ggtitle("Line Plot Konsumsi Berdasarkan Kota/Kabupaten") +
xlab("Kota/Kabupaten") +
ylab("Konsumsi") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
data_viz<-data_kp %>%
select(-KOTA_KAB)
head(data_viz)
## KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI KALORI PROTEIN LEMAK
## 1 5.633103 0.3132981 5.946401 1804.658 51.40107 44.71250
## 2 4.887103 0.3175596 5.204663 1735.882 50.24038 41.52217
## 3 7.079314 0.1958701 7.275185 1870.661 57.00926 43.22562
## 4 6.660440 0.2398892 6.900329 1847.940 53.84075 41.64808
## 5 5.043323 0.5632651 5.606588 1652.160 48.62661 35.52076
## 6 5.658940 0.7627645 6.421704 2010.038 57.51819 42.15090
## KARBO
## 1 253.2761
## 2 249.6948
## 3 259.3475
## 4 264.3559
## 5 249.9251
## 6 307.3939
plot_density(data = data_viz ,binary_as_factor = TRUE,
geom_density_args = list(fill="#03A9F4"),
nrow = 2,
ncol = 1,
ggtheme = theme_pubr(base_size = 9))
data_standarisasi<-scale(data_viz)
head(data_standarisasi)
## KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI KALORI PROTEIN
## [1,] -0.2830039 -0.08147206 -0.2877741 -0.376017853 -1.0923781
## [2,] -1.1106864 -0.06078817 -1.0729320 -0.968598656 -1.4013782
## [3,] 1.3215599 -0.65143767 1.1187934 0.192671985 0.4006416
## [4,] 0.8568207 -0.43778005 0.7219946 -0.003091168 -0.4428817
## [5,] -0.9373618 1.13180349 -0.6474792 -1.689964365 -1.8309978
## [6,] -0.2543380 2.10012193 0.2153523 1.393570815 0.5361310
## LEMAK KARBO
## [1,] 0.2746234 -0.497881168
## [2,] -0.5472572 -0.659347804
## [3,] -0.1084205 -0.224141691
## [4,] -0.5148216 0.001671892
## [5,] -2.0933192 -0.648967278
## [6,] -0.3852864 1.942112479
summary(data_standarisasi)
## KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI KALORI
## Min. :-1.1789 Min. :-1.2127 Min. :-1.1882 Min. :-1.92917
## 1st Qu.:-0.7038 1st Qu.:-0.8503 1st Qu.:-0.8099 1st Qu.:-0.55791
## Median :-0.2547 Median :-0.3802 Median :-0.1978 Median :-0.04241
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.4866 3rd Qu.: 0.5257 3rd Qu.: 0.6152 3rd Qu.: 0.68559
## Max. : 3.0416 Max. : 2.1001 Max. : 2.9950 Max. : 1.94090
## PROTEIN LEMAK KARBO
## Min. :-1.8310 Min. :-2.44654 Min. :-1.80039
## 1st Qu.:-0.6137 1st Qu.:-0.50645 1st Qu.:-0.68965
## Median : 0.3747 Median :-0.06009 Median :-0.06236
## Mean : 0.0000 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.7929 3rd Qu.: 0.58100 3rd Qu.: 0.43732
## Max. : 1.4792 Max. : 1.65880 Max. : 1.94211
library(GGally)
## Warning: package 'GGally' was built under R version 4.3.3
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggcorr(data_standarisasi,method = c("everything","pearson"),geom='tile',label = TRUE)
library(factoextra)
library(cluster)
metode_agg <-"complete"
map(metode_agg, function(i)
fviz_nbclust(
x = data_standarisasi,
FUNcluster = hcut,
method = "silhouette",
hc_method = i,
hc_fun = "hclust",
k.max = 25
)+
ggtitle(str_c("Optimal number of clusters based on HC with ",i," linkage"))
)
## [[1]]
# Hierarchical clustering dengan complete linkage
dist_matrix <- dist(data_standarisasi) # Membuat matriks jarak
hclust_complete <- hclust(dist_matrix, method = "complete")
print(dist_matrix)
## 1 2 3 4 5 6 7
## 2 1.5652960
## 3 2.7662411 4.0057654
## 4 1.9696408 3.0860798 1.1768984
## 5 3.1514446 2.1745536 4.9037974 3.9045532
## 6 4.1455815 4.8156161 4.1352832 3.8240719 5.1856724
## 7 3.2416003 4.4503407 2.2428108 2.7311688 5.9094402 3.7383672
## 8 4.2513586 5.5055915 3.6278715 3.8447724 6.5437983 2.5563195 2.5440238
## 9 5.0760410 6.2865193 2.8633810 3.5917222 6.5228126 5.5713795 4.5228278
## 10 3.6321045 4.7364122 1.6395431 2.1257410 5.5509477 3.2690750 1.9725422
## 11 2.0063889 2.9346182 3.0073168 2.9514025 4.7266225 5.2863941 2.9095677
## 12 1.8619709 1.4926504 3.6771607 2.8228668 2.7671005 3.6247584 3.6998128
## 13 2.5504516 3.0368474 3.0909522 3.0633093 4.7361690 4.6393328 2.4080045
## 14 2.4899250 3.3804177 2.9776376 3.0112351 5.2329078 4.3852915 1.7323080
## 15 2.3064338 2.1244856 4.4082061 3.8995541 3.9478168 6.2031547 4.6052661
## 16 2.0021501 2.1864987 3.1552942 2.8029054 3.9584371 4.7286745 2.9644748
## 17 2.0377854 1.9463789 3.5375754 3.0967167 3.6593008 5.6447608 3.9026701
## 18 2.3653509 3.7123215 2.4290319 2.5904225 5.3356691 3.8979953 1.3593605
## 19 3.2516528 3.8631232 3.6029326 3.4927805 5.5453154 3.8441973 2.0187473
## 20 2.2360350 2.8960578 2.6691260 1.8751254 2.9263655 4.6958365 4.3811781
## 21 1.1269049 2.2309010 2.3683817 1.7567556 3.8418616 3.6075192 2.3075144
## 22 3.5120096 3.0350734 4.5784974 3.6610998 1.3689256 5.3718872 6.0374111
## 23 4.9670463 6.1462885 3.0274991 3.5309035 6.7739568 3.2117070 2.9977237
## 24 2.1684532 2.6013485 2.6704547 2.2128665 3.9555337 3.5298814 2.4146974
## 25 3.9180864 4.5100777 4.0161676 3.7640296 5.0895636 0.9011036 3.3302798
## 26 2.1764677 2.7249542 3.6576348 3.0185536 3.2201335 2.9064362 3.8820746
## 27 2.1889476 2.9147621 2.1998526 1.3646591 3.2119804 2.9027071 3.2762994
## 8 9 10 11 12 13 14
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9 4.9068234
## 10 2.9148461 3.5007744
## 11 4.6177351 5.1937636 4.0368658
## 12 4.6046173 6.1413213 3.8946870 3.2512277
## 13 4.4338772 5.6422228 3.5161286 1.9044467 2.6187126
## 14 3.6831948 5.5587705 3.2916452 1.7907433 2.8990222 1.1687947
## 15 6.1060404 6.5841509 5.4743926 1.9652888 3.1782272 2.9195870 3.1748751
## 16 4.8251807 5.7699315 3.7424401 1.8728373 2.0034884 0.9576744 1.6936053
## 17 5.7038572 5.8902844 4.5482797 1.7950176 2.6153553 2.0233410 2.6315676
## 18 2.7205731 4.6823249 2.8093039 1.9674486 3.2889370 2.2439964 1.3558888
## 19 3.5446256 6.2529065 3.2113669 3.2412641 2.8490024 2.0742091 1.5784338
## 20 5.0800579 3.8873576 3.7980254 3.5303520 3.3201778 4.1560939 4.2950286
## 21 3.5710633 5.0045773 2.8380791 2.0952986 1.7488599 2.0026017 1.7384298
## 22 6.6317732 5.7019096 5.3301219 5.0294748 3.5513824 5.1966894 5.6677285
## 23 2.5526589 3.7284645 1.7182840 5.4330640 5.2008149 4.9866014 4.5887235
## 24 3.9577647 5.3612537 2.6750753 2.8231230 1.5960546 1.7174880 1.9692775
## 25 2.6193607 5.7235926 3.1708503 4.8564454 3.2274748 3.9894164 3.8181445
## 26 3.7121560 5.2554899 3.9855215 3.5220165 2.3881026 3.6466202 3.5991756
## 27 3.7667496 4.0918288 2.4528237 3.6390383 2.3297086 3.4216451 3.5108258
## 15 16 17 18 19 20 21
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16 2.3049422
## 17 1.3829713 1.3011564
## 18 3.6415284 2.5994950 3.2812839
## 19 4.2593727 2.4192131 3.6305951 2.3123464
## 20 3.8067849 3.6437228 3.3499851 3.8349916 4.9899536
## 21 2.9340052 1.6972241 2.3669882 1.7322091 2.2139373 2.8682820
## 22 4.5626589 4.4891166 4.1289739 5.5626499 6.0725527 2.3202614 4.1912019
## 23 6.9641712 5.3089163 6.1499975 3.8483911 4.3300144 4.9599253 4.2169388
## 24 3.5428698 1.5037904 2.5922614 2.4887180 1.8191838 3.4979416 1.3180119
## 25 5.7883464 4.1605795 5.1787147 3.5434109 3.2249942 4.7612787 3.2838710
## 26 3.9261641 3.4026750 3.7586700 3.1428549 3.9748359 2.9391470 2.4819752
## 27 4.3121184 3.1095817 3.5549618 3.1403202 3.6729087 1.9996394 2.0611859
## 22 23 24 25 26
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23 6.4739218
## 24 4.3549193 4.1651172
## 25 5.4081102 3.4016421 3.0209973
## 26 3.5932458 4.7904394 3.0581021 2.8183014
## 27 3.0487508 3.5788999 2.2159872 2.9165808 2.2334067
hclust_complete <- hclust(dist_matrix, method = "complete")
clusters <- cutree(hclust_complete, k = 4)
data_standarisasi <- as.data.frame(data_standarisasi)
data_standarisasi$CLUSTER <- clusters
head(data_standarisasi)
## KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI KALORI PROTEIN
## 1 -0.2830039 -0.08147206 -0.2877741 -0.376017853 -1.0923781
## 2 -1.1106864 -0.06078817 -1.0729320 -0.968598656 -1.4013782
## 3 1.3215599 -0.65143767 1.1187934 0.192671985 0.4006416
## 4 0.8568207 -0.43778005 0.7219946 -0.003091168 -0.4428817
## 5 -0.9373618 1.13180349 -0.6474792 -1.689964365 -1.8309978
## 6 -0.2543380 2.10012193 0.2153523 1.393570815 0.5361310
## LEMAK KARBO CLUSTER
## 1 0.2746234 -0.497881168 1
## 2 -0.5472572 -0.659347804 1
## 3 -0.1084205 -0.224141691 2
## 4 -0.5148216 0.001671892 2
## 5 -2.0933192 -0.648967278 3
## 6 -0.3852864 1.942112479 4
data_split <- split(data_standarisasi, data_standarisasi$CLUSTER)
# Menghitung jarak untuk setiap CLUSTER
distance_matrices <- lapply(data_split, function(sub_data) {
sub_data <- sub_data[, -which(names(sub_data) == "CLUSTER")] # Menghapus kolom CLUSTER
dist(sub_data)
})
# Menampilkan matriks jarak untuk CLUSTER tertentu, misalnya CLUSTER 1
print(as.matrix(distance_matrices[[1]]))
## 1 2 7 11 12 13 14 15
## 1 0.000000 1.565296 3.241600 2.006389 1.861971 2.5504516 2.489925 2.306434
## 2 1.565296 0.000000 4.450341 2.934618 1.492650 3.0368474 3.380418 2.124486
## 7 3.241600 4.450341 0.000000 2.909568 3.699813 2.4080045 1.732308 4.605266
## 11 2.006389 2.934618 2.909568 0.000000 3.251228 1.9044467 1.790743 1.965289
## 12 1.861971 1.492650 3.699813 3.251228 0.000000 2.6187126 2.899022 3.178227
## 13 2.550452 3.036847 2.408005 1.904447 2.618713 0.0000000 1.168795 2.919587
## 14 2.489925 3.380418 1.732308 1.790743 2.899022 1.1687947 0.000000 3.174875
## 15 2.306434 2.124486 4.605266 1.965289 3.178227 2.9195870 3.174875 0.000000
## 16 2.002150 2.186499 2.964475 1.872837 2.003488 0.9576744 1.693605 2.304942
## 17 2.037785 1.946379 3.902670 1.795018 2.615355 2.0233410 2.631568 1.382971
## 18 2.365351 3.712322 1.359361 1.967449 3.288937 2.2439964 1.355889 3.641528
## 19 3.251653 3.863123 2.018747 3.241264 2.849002 2.0742091 1.578434 4.259373
## 21 1.126905 2.230901 2.307514 2.095299 1.748860 2.0026017 1.738430 2.934005
## 24 2.168453 2.601349 2.414697 2.823123 1.596055 1.7174880 1.969277 3.542870
## 26 2.176468 2.724954 3.882075 3.522016 2.388103 3.6466202 3.599176 3.926164
## 16 17 18 19 21 24 26
## 1 2.0021501 2.037785 2.365351 3.251653 1.126905 2.168453 2.176468
## 2 2.1864987 1.946379 3.712322 3.863123 2.230901 2.601349 2.724954
## 7 2.9644748 3.902670 1.359361 2.018747 2.307514 2.414697 3.882075
## 11 1.8728373 1.795018 1.967449 3.241264 2.095299 2.823123 3.522016
## 12 2.0034884 2.615355 3.288937 2.849002 1.748860 1.596055 2.388103
## 13 0.9576744 2.023341 2.243996 2.074209 2.002602 1.717488 3.646620
## 14 1.6936053 2.631568 1.355889 1.578434 1.738430 1.969277 3.599176
## 15 2.3049422 1.382971 3.641528 4.259373 2.934005 3.542870 3.926164
## 16 0.0000000 1.301156 2.599495 2.419213 1.697224 1.503790 3.402675
## 17 1.3011564 0.000000 3.281284 3.630595 2.366988 2.592261 3.758670
## 18 2.5994950 3.281284 0.000000 2.312346 1.732209 2.488718 3.142855
## 19 2.4192131 3.630595 2.312346 0.000000 2.213937 1.819184 3.974836
## 21 1.6972241 2.366988 1.732209 2.213937 0.000000 1.318012 2.481975
## 24 1.5037904 2.592261 2.488718 1.819184 1.318012 0.000000 3.058102
## 26 3.4026750 3.758670 3.142855 3.974836 2.481975 3.058102 0.000000
print(as.matrix(distance_matrices[[2]]))
## 3 4 9 20 27
## 3 0.000000 1.176898 2.863381 2.669126 2.199853
## 4 1.176898 0.000000 3.591722 1.875125 1.364659
## 9 2.863381 3.591722 0.000000 3.887358 4.091829
## 20 2.669126 1.875125 3.887358 0.000000 1.999639
## 27 2.199853 1.364659 4.091829 1.999639 0.000000
print(as.matrix(distance_matrices[[3]]))
## 5 22
## 5 0.000000 1.368926
## 22 1.368926 0.000000
print(as.matrix(distance_matrices[[4]]))
## 6 8 10 23 25
## 6 0.0000000 2.556319 3.269075 3.211707 0.9011036
## 8 2.5563195 0.000000 2.914846 2.552659 2.6193607
## 10 3.2690750 2.914846 0.000000 1.718284 3.1708503
## 23 3.2117070 2.552659 1.718284 0.000000 3.4016421
## 25 0.9011036 2.619361 3.170850 3.401642 0.0000000
distance_matrix_1 <- as.matrix(distance_matrices[[1]])
distance_matrix_2 <- as.matrix(distance_matrices[[2]])
distance_matrix_3 <- as.matrix(distance_matrices[[3]])
distance_matrix_4 <- as.matrix(distance_matrices[[4]])
# Mengambil nilai maksimum dari matriks jarak (kecuali diagonal)
max_distance_1 <- max(distance_matrix_1)
max_distance_2 <- max(distance_matrix_2)
max_distance_3 <- max(distance_matrix_3)
max_distance_4 <- max(distance_matrix_4)
# Menampilkan jarak maksimum
sprintf("Maksimum Jarak Matriks Cluster 1 : %.2f", max_distance_1)
## [1] "Maksimum Jarak Matriks Cluster 1 : 4.61"
sprintf("Maksimum Jarak Matriks Cluster 2 : %.2f", max_distance_2)
## [1] "Maksimum Jarak Matriks Cluster 2 : 4.09"
sprintf("Maksimum Jarak Matriks Cluster 3 : %.2f", max_distance_3)
## [1] "Maksimum Jarak Matriks Cluster 3 : 1.37"
sprintf("Maksimum Jarak Matriks Cluster 4 : %.2f", max_distance_4)
## [1] "Maksimum Jarak Matriks Cluster 4 : 3.40"
# Plot dendrogram
plot(hclust_complete, labels = data_kp$KOTA_KAB, main = "Hierarchical Clustering Dendrogram")
rect.hclust(hclust_complete,4)
library(fpc)
## Warning: package 'fpc' was built under R version 4.3.3
# Hierarchical Clustering dengan complete linkage
hc <- hclust(dist(data_standarisasi), method = "complete")
calculate_silhouette_index <- function(data, hc, k) {
clusters <- cutree(hc, k = k)
# Hitung jarak antar objek
dist_matrix <- dist(data)
# Hitung silhouette
silhouette_result <- silhouette(clusters, dist_matrix)
# Rata-rata nilai silhouette
silhouette_index <- mean(silhouette_result[, 3])
return(silhouette_index)
}
# Hitung Silhouette Index untuk k = 2, 3, dan 4
k_values <- c(2:10)
silhouette_results <- sapply(k_values, function(k) calculate_silhouette_index(data_standarisasi, hc, k))
silhouette_index <- data.frame(
Clusters = k_values,
Silhouette_Index = silhouette_results
)
print(silhouette_index)
## Clusters Silhouette_Index
## 1 2 0.2797809
## 2 3 0.3251431
## 3 4 0.3447322
## 4 5 0.2826450
## 5 6 0.2777626
## 6 7 0.2852300
## 7 8 0.2888611
## 8 9 0.2712916
## 9 10 0.2691137
ggplot(silhouette_index, aes(x = Clusters, y = Silhouette_Index)) +
geom_line(color = "lightblue", linewidth = 1) + # Warna garis biru
labs(x = "Cluster", y = "Silhouette Index", title = "Line Plot Silhouette Index Berdasarkan Cluster") + # Nama variabel dan judul plot kustom
theme_minimal()
ggplot(silhouette_index, aes(x = Clusters, y = Silhouette_Index)) +
geom_bar(stat = "identity", fill = "lightblue") +
labs(x = "Cluster", y = "Silhouette Index", title = "Bar Plot Silhouette Index Berdasarkan Cluster") +
theme_minimal()
Silhouette Index mengukur seberapa baik setiap objek cocok dengan
cluster-nya dibandingkan dengan cluster lainnya. Nilai yang lebih tinggi
berada pada cluster sebanyak 4, sehingga cluster k = 4 merupakan
clustering yang lebih baik berdasarkan Silhouette Index.
library(clusterSim)
## Warning: package 'clusterSim' was built under R version 4.3.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
# Hierarchical Clustering dengan complete linkage
hc <- hclust(dist(data_standarisasi), method = "complete")
calculate_db_index <- function(data, hc, k) {
clusters <- cutree(hc, k = k)
# Hitung Davies-Bouldin Index
db_index <- index.DB(data, clusters)$DB
return(db_index)
}
# Hitung DB Index untuk k = 2, 3, dan 4
k_values <- c(2:10)
db_results <- sapply(k_values, function(k) calculate_db_index(data_standarisasi, hc, k))
db_index <- data.frame(
Clusters = k_values,
Davies_Bouldin_Index = db_results
)
print(db_index)
## Clusters Davies_Bouldin_Index
## 1 2 1.4541759
## 2 3 1.0755999
## 3 4 0.9709037
## 4 5 1.0930343
## 5 6 0.9435998
## 6 7 0.8613765
## 7 8 0.8492397
## 8 9 0.8576456
## 9 10 0.8013965
ggplot(db_index, aes(x = Clusters, y = Davies_Bouldin_Index)) +
geom_line(color = "lightgreen", linewidth = 1) +
labs(x = "Cluster", y = "Davies-Bouldin Index", title = "Line Plot Davies-Bouldin Index Berdasarkan Cluster") + # Nama variabel dan judul plot kustom
theme_minimal()
ggplot(db_index, aes(x = Clusters, y = Davies_Bouldin_Index)) +
geom_bar(stat = "identity", fill = "lightgreen") +
labs(x = "Cluster", y = "Davies-Bouldin Index", title = "Bar Plot Davies-Bouldin Index Berdasarkan Cluster") +
theme_minimal()
Davies-Bouldin Index mengukur rata-rata rasio jarak intra-cluster
terhadap jarak antar-cluster. Nilai yang rendah berada pada cluster
sebanyak 4, sehingga cluster k = 4 merupakan clustering yang lebih baik
berdasarkan Davies-Bouldin Index.
library(fpc)
# Hierarchical Clustering dengan complete linkage
hc <- hclust(dist(data_standarisasi), method = "complete")
calculate_dunn_index <- function(data, hc, k) {
clusters <- cutree(hc, k = k)
# Hitung jarak antar objek
dist_matrix <- dist(data)
# Hitung Dunn Index
dunn_index <- cluster.stats(dist_matrix, clusters)$dunn
return(dunn_index)
}
# Hitung Dunn Index untuk k = 2, 3, dan 4
k_values <- c(2:10)
dunn_results <- sapply(k_values, function(k) calculate_dunn_index(data_standarisasi, hc, k))
dunn_index <- data.frame(
Clusters = k_values,
Dunn_Index = dunn_results
)
print(dunn_index)
## Clusters Dunn_Index
## 1 2 0.3161736
## 2 3 0.3334068
## 3 4 0.4389394
## 4 5 0.3179890
## 5 6 0.3314065
## 6 7 0.3825083
## 7 8 0.4389163
## 8 9 0.4289227
## 9 10 0.4378942
ggplot(dunn_index, aes(x = Clusters, y = Dunn_Index)) +
geom_line(color = "lightcoral", linewidth = 1) +
labs(x = "Cluster", y = "Dunn Index", title = "Line Plot Dunn Index Berdasarkan Cluster") + # Nama variabel dan judul plot kustom
theme_minimal()
ggplot(dunn_index, aes(x = Clusters, y = Dunn_Index)) +
geom_bar(stat = "identity", fill = "lightcoral") +
labs(x = "Cluster", y = "Dunn Index", title = "Bar Plot Dunn Index Berdasarkan Cluster") +
theme_minimal()
Dunn Index mengukur rasio jarak terjauh antar-cluster terhadap jarak
terdekat intra-cluster. Nilai yang lebih tinggi berada pada cluster
sebanyak 4, sehingga cluster k = 4 merupakan clustering yang lebih baik
berdasarkan Dunn Index.
hc <- hclust(dist(data_standarisasi), method = "complete")
calculate_wcss <- function(data, hc, k) {
clusters <- cutree(hc, k = k)
distances <- dist(data)
wcss <- sum(sapply(unique(clusters), function(cluster) {
# Ambil data untuk cluster ini
cluster_data <- data[clusters == cluster, ]
# Hitung jarak antar objek dalam cluster ini
cluster_distances <- dist(cluster_data)
# Jumlahkan kuadrat dari jarak
sum(cluster_distances^2) / 2 # Bagikan dengan 2 untuk menghindari penghitungan ganda
}))
return(wcss)
}
# Hitung WCSS untuk k = 2, 3, dan 4
k_values <- c(2:10)
wcss_results <- sapply(k_values, function(k) calculate_wcss(data_standarisasi, hc, k))
# Tampilkan hasil
wcss_index <- data.frame(
Clusters = k_values,
WCSS_Index = wcss_results
)
print(wcss_index)
## Clusters WCSS_Index
## 1 2 1053.79485
## 2 3 691.62102
## 3 4 445.45215
## 4 5 203.00418
## 5 6 176.52717
## 6 7 117.29138
## 7 8 88.47150
## 8 9 58.48111
## 9 10 46.46830
ggplot(wcss_index, aes(x = Clusters, y = WCSS_Index)) +
geom_line(color = "lightpink", linewidth = 1) +
labs(x = "Cluster", y = "Within-Cluster Sum of Squares Index", title = "Line Plot Within-Cluster Sum of Squares Index Berdasarkan Cluster") + # Nama variabel dan judul plot kustom
theme_minimal()
ggplot(wcss_index, aes(x = Clusters, y = WCSS_Index)) +
geom_bar(stat = "identity", fill = "lightpink") +
labs(x = "Cluster", y = "Within-Cluster Sum of Squares Index", title = "Bar Plot Within-Cluster Sum of Squares Index Berdasarkan Cluster") +
theme_minimal()
Within-Cluster Sum of Squares Index mengukur variansi dalam cluster.
Nilai yang rendah berada pada cluster sebanyak 4, sehingga cluster k = 4
merupakan clustering yang lebih baik berdasarkan Within-Cluster Sum of
Squares Index.
library(fpc)
# Hierarchical Clustering dengan complete linkage
hc <- hclust(dist(data_standarisasi), method = "complete")
calculate_ch_index <- function(data, hc, k) {
clusters <- cutree(hc, k = k)
# Hitung jarak antar objek
dist_matrix <- dist(data)
# Hitung CH Index
ch_index <- cluster.stats(dist_matrix, clusters)$ch
return(ch_index)
}
# Hitung CH Index untuk k = 2, 3, dan 4
k_values <- c(2:10)
ch_results <- sapply(k_values, function(k) calculate_ch_index(data_standarisasi, hc, k))
ch_index <- data.frame(
Clusters = k_values,
Calinski_Harabasz_Index = ch_results
)
print(ch_index)
## Clusters Calinski_Harabasz_Index
## 1 2 11.33669
## 2 3 10.92595
## 3 4 13.21850
## 4 5 13.21784
## 5 6 12.55976
## 6 7 12.87354
## 7 8 13.66953
## 8 9 13.71480
## 9 10 13.47410
ggplot(ch_index, aes(x = Clusters, y = Calinski_Harabasz_Index)) +
geom_line(color = "lightskyblue", linewidth = 1) +
labs(x = "Cluster", y = "Calinski Harabasz Index", title = "Line Plot Calinski Harabasz Index Berdasarkan Cluster") + # Nama variabel dan judul plot kustom
theme_minimal()
ggplot(ch_index, aes(x = Clusters, y = Calinski_Harabasz_Index)) +
geom_bar(stat = "identity", fill = "lightskyblue") +
labs(x = "Cluster", y = "Calinski Harabasz Index", title = "Bar Plot Calinski Harabasz Index Berdasarkan Cluster") +
theme_minimal()
Calinski-Harabasz Index mengukur mengukur rasio variansi antar-cluster
terhadap variansi intra-cluster. Nilai yang lebih tinggi berada pada
cluster sebanyak 4, sehingga cluster k = 4 merupakan clustering yang
lebih baik berdasarkan Calinski-Harabasz Index.
# Buat daftar dari semua dataframe yang ingin digabungkan
list_of_dataframes <- list(silhouette_index, db_index, dunn_index, wcss_index, ch_index)
# Gabungkan semua dataframe dalam daftar berdasarkan kolom 'Clusters'
cluster_index <- Reduce(function(x, y) merge(x, y, by = "Clusters"), list_of_dataframes)
# Hasil akhir adalah merged_df yang berisi semua dataframe yang digabungkan
print(cluster_index)
## Clusters Silhouette_Index Davies_Bouldin_Index Dunn_Index WCSS_Index
## 1 2 0.2797809 1.4541759 0.3161736 1053.79485
## 2 3 0.3251431 1.0755999 0.3334068 691.62102
## 3 4 0.3447322 0.9709037 0.4389394 445.45215
## 4 5 0.2826450 1.0930343 0.3179890 203.00418
## 5 6 0.2777626 0.9435998 0.3314065 176.52717
## 6 7 0.2852300 0.8613765 0.3825083 117.29138
## 7 8 0.2888611 0.8492397 0.4389163 88.47150
## 8 9 0.2712916 0.8576456 0.4289227 58.48111
## 9 10 0.2691137 0.8013965 0.4378942 46.46830
## Calinski_Harabasz_Index
## 1 11.33669
## 2 10.92595
## 3 13.21850
## 4 13.21784
## 5 12.55976
## 6 12.87354
## 7 13.66953
## 8 13.71480
## 9 13.47410
long_df <- cluster_index %>%
pivot_longer(cols = c(Silhouette_Index, Davies_Bouldin_Index, Dunn_Index, WCSS_Index, Calinski_Harabasz_Index),
names_to = "Index", values_to = "Value")
ggplot(long_df, aes(x = Clusters, y = Value, group = Index, color = Index)) +
geom_line(size = 1) +
facet_wrap(~ Index, scales = "free_y", ncol = 5) +
labs(x = "Cluster", y = "Index Value", title = "Line Plot Cluster Index Berdasarkan Cluster") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(tidyr)
# Gabungkan data dalam format long untuk memudahkan plotting
cluster_index <- cluster_index %>%
pivot_longer(cols = -Clusters, names_to = "Index", values_to = "Value")
# Membuat diagram batang dengan layout 5 kolom
ggplot(cluster_index, aes(x = Clusters, y = Value, fill = Index)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Index, scales = "free_y", ncol = 5) +
labs(x = "Clusters", y = "Values", title = "Bar Plot Cluster Index Berdasarkan Cluster") +
theme_minimal()
hclust_complete <- hclust(dist_matrix, method = "complete")
clusters <- cutree(hclust_complete, k = 4)
data_kp$CLUSTER <- clusters
data_kp
## KOTA_KAB KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI KALORI
## 1 Bandung 5.633103 0.31329813 5.946401 1804.658
## 2 Bandung Barat 4.887103 0.31755956 5.204663 1735.882
## 3 Bekasi 7.079314 0.19587008 7.275185 1870.661
## 4 Bogor 6.660440 0.23988922 6.900329 1847.940
## 5 Ciamis 5.043323 0.56326513 5.606588 1652.160
## 6 Cianjur 5.658940 0.76276445 6.421704 2010.038
## 7 Cirebon 6.272175 0.16882896 6.441003 1967.718
## 8 Garut 6.381393 0.56620620 6.947599 2073.562
## 9 Indramayu 8.629633 0.41801805 9.047651 1814.218
## 10 Karawang 6.956205 0.24104930 7.197254 1938.188
## 11 Kota Bandung 5.734377 0.14097098 5.875348 1772.523
## 12 Kota Banjar 4.825581 0.35930953 5.184891 1822.153
## 13 Kota Bekasi 5.238051 0.12457213 5.362623 1843.376
## 14 Kota Bogor 5.426083 0.11755026 5.543634 1904.372
## 15 Kota Cimahi 4.969171 0.12658823 5.095759 1685.572
## 16 Kota Cirebon 5.148019 0.11725711 5.265276 1794.572
## 17 Kota Depok 5.269708 0.08022675 5.349935 1722.057
## 18 Kota Sukabumi 6.019188 0.25175779 6.270946 1917.551
## 19 Kota Tasikmalaya 5.096843 0.10568572 5.202528 1955.900
## 20 Kuningan 6.696064 0.41114010 7.107204 1726.503
## 21 Majalengka 5.658582 0.23893214 5.897514 1860.445
## 22 Pangandaran 5.885834 0.57146676 6.457301 1624.397
## 23 Purwakarta 7.438918 0.40114391 7.840062 2044.058
## 24 Subang 5.414195 0.19320197 5.607397 1858.440
## 25 Sukabumi 5.339491 0.69194640 6.031437 1992.707
## 26 Sumedang 5.379312 0.73499088 6.114303 1824.296
## 27 Tasikmalaya 6.239748 0.45876590 6.698514 1840.121
## PROTEIN LEMAK KARBO CLUSTER
## 1 51.40107 44.71250 253.2761 1
## 2 50.24038 41.52217 249.6948 1
## 3 57.00926 43.22562 259.3475 2
## 4 53.84075 41.64808 264.3559 2
## 5 48.62661 35.52076 249.9251 3
## 6 57.51819 42.15090 307.3939 4
## 7 60.34936 47.97507 274.1305 1
## 8 58.49617 50.01194 297.3845 4
## 9 58.46919 43.41321 250.2679 2
## 10 59.82331 42.38142 287.9881 4
## 11 55.43122 49.34251 235.2956 1
## 12 53.43185 40.71733 270.3163 1
## 13 59.69793 45.31480 247.0994 1
## 14 58.29864 48.41948 257.7365 1
## 15 51.10099 47.13435 224.3872 1
## 16 56.93196 43.69931 248.3197 1
## 17 54.22694 43.58696 229.4392 1
## 18 56.91189 50.08551 262.9356 1
## 19 59.27829 46.48874 286.7551 1
## 20 49.97542 40.41305 248.3504 2
## 21 54.02775 44.86992 267.5551 1
## 22 48.67894 34.14966 248.2808 3
## 23 61.06077 43.34204 305.1252 4
## 24 57.33747 41.71305 273.9062 1
## 25 59.38748 42.58825 299.8753 4
## 26 52.96619 44.46878 263.9270 1
## 27 54.09913 39.55965 273.5392 2
library(ggplot2)
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(dplyr) # Pastikan dplyr sudah dimuat
jabar=st_read(dsn="C:/Users/Tasya Anisah Rizqi/Documents/IPB/PEMODELAN KLASIFIKASI/SHP/SHP", layer="provjabar")
## Reading layer `provjabar' from data source
## `C:\Users\Tasya Anisah Rizqi\Documents\IPB\PEMODELAN KLASIFIKASI\SHP\SHP'
## using driver `ESRI Shapefile'
## Simple feature collection with 27 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY, XYZ
## Bounding box: xmin: 106.3703 ymin: -7.82099 xmax: 108.8468 ymax: -5.806538
## z_range: zmin: 0 zmax: 0
## Geodetic CRS: WGS 84
plot(jabar)
# Memastikan bahwa peta adalah 2D
jabar_2d <- st_zm(jabar, drop = TRUE)
# Memeriksa validitas geometri
valid_geometries <- st_is_valid(jabar_2d)
# Memperbaiki geometri yang tidak valid
jabarmap_valid <- st_make_valid(jabar_2d)
# Menggabungkan data hasil cluster dan data geospasial
peta_jabar2 <- jabarmap_valid %>%
left_join(data_kp, by = c("NAMOBJ" = "KOTA_KAB"))
# Memastikan bahwa peta adalah 2D
peta_jabar3 <- st_zm(peta_jabar2, drop = TRUE)
# Visualisasi hasil cluster pada peta Jawa Barat dengan gradasi warna berbeda
ggplot(peta_jabar3) +
geom_sf(aes(fill = as.factor(CLUSTER)), color = "black") +
scale_fill_manual(values = c("#FF7F50", "#4682B4", "#32CD32", "#FFD700"), name = "Cluster") +
geom_sf_text(aes(label = NAMOBJ), size = 3.5, color = "black") +
labs(title = "Visualisasi Hasil Clustering Complete Linkage pada Peta Provinsi Jawa Barat") +
theme_minimal()
## Warning in st_point_on_surface.sfc(sf::st_zm(x)): st_point_on_surface may not
## give correct results for longitude/latitude data
# Menentukan rata-rata setiap variabel berdasarkan cluster
cluster_means <- aggregate(cbind(KONSUMSI_PEMBELIAN,KONSUMSI_PRODUKSI,KONSUMSI,KALORI,PROTEIN,LEMAK,KARBO)~CLUSTER,data_standarisasi, FUN = mean,na.rm =TRUE)
# Menampilkan hasil rata-rata untuk setiap cluster
print(cluster_means)
## CLUSTER KONSUMSI_PEMBELIAN KONSUMSI_PRODUKSI KONSUMSI KALORI
## 1 1 -0.5437390 -0.50495850 -0.6288910 -0.1464560
## 2 2 1.3012841 0.07112251 1.2570297 -0.2447867
## 3 3 -0.4699810 1.15170780 -0.1972234 -1.8095659
## 4 4 0.5179252 0.98306987 0.7085326 1.4079810
## PROTEIN LEMAK KARBO
## 1 -0.01656162 0.4354270 -0.3607175
## 2 -0.21978928 -0.5138318 -0.2320446
## 3 -1.82403171 -2.2699290 -0.6860344
## 4 0.99908683 0.1155222 1.5886107
# Menyusun data untuk visualisasi
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
cluster_means_long <- melt(cluster_means, id.vars = "CLUSTER", variable.name = "Variabel", value.name = "Nilai_Rata_Rata")
# Membuat visualisasi dengan nama variabel yang sesuai dari data_standarisasi
library(ggplot2)
ggplot(cluster_means_long, aes(x = Variabel, y = Nilai_Rata_Rata, group = CLUSTER, color = CLUSTER)) +
geom_line() +
geom_point() +
labs(title = "Nilai Rata-Rata Setiap Variabel Berdasarkan Cluster",
x = "Variabel",
y = "Nilai Rata-Rata") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) # Memiringkan teks pada sumbu x