## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'FactoMineR' was built under R version 4.4.3
## Warning: package 'factoextra' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Warning: package 'ggrepel' was built under R version 4.4.3
## Warning: package 'ggforce' was built under R version 4.4.2
## Warning: package 'pheatmap' was built under R version 4.4.3
## Warning: package 'sf' was built under R version 4.4.3
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
df <- read.csv("C:/Users/Ainul Hayati/Documents/kuliah/Departemen/Semester 5/Teknik Peubah Ganda/0_df_long.csv", sep=';')
df$NamaProv[is.na(df$NamaProv)] <- "Kalimantan Utara"
df$NamaProv[df$NamaProv == "DI Yogyakarta"] <- "Daerah Istimewa Yogyakarta"
df$NamaProv[df$NamaProv == "DKI Jakarta"] <- "Dki Jakarta"
str(df)## 'data.frame': 340620 obs. of 9 variables:
## $ KabKot : chr "Kab. Aceh Barat" "Kab. Aceh Barat" "Kab. Aceh Barat" "Kab. Aceh Barat" ...
## $ Tahun : int 2022 2022 2022 2022 2022 2022 2022 2022 2022 2022 ...
## $ Bulan : chr "Januari" "Januari" "Januari" "Januari" ...
## $ Produk : chr "Beras Premium" "Beras Medium" "Bawang Merah" "Bawang Putih (Bonggol)" ...
## $ Harga : num 11429 9979 31000 28636 19409 ...
## $ Kategori: chr "Beras" "Beras" "Bawang" "Bawang" ...
## $ KodeBPS : num 11.1 11.1 11.1 11.1 11.1 ...
## $ KodeProv: int 11 11 11 11 11 11 11 11 11 11 ...
## $ NamaProv: chr "Aceh" "Aceh" "Aceh" "Aceh" ...
Cabai Merah Besar,
Jagung Tk. Peternak, dan Minyakita dikeluarkan
karena terdapat missing value## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 4000 14750 26458 35451 38329 200000 17111
df_prov <- df_2023 %>%
group_by(NamaProv, Produk) %>%
summarise(HargaRata = mean(Harga, na.rm = TRUE), .groups = "drop")
df_wide <- df_prov %>%
pivot_wider(names_from = Produk, values_from = HargaRata)
df_wide_clean <- df_wide %>%
select(-`Cabai Merah Besar`,
-`Jagung Tk. Peternak`,
-Minyakita)
str(df_wide_clean)## tibble [34 × 13] (S3: tbl_df/tbl/data.frame)
## $ NamaProv : chr [1:34] "Aceh" "Bali" "Banten" "Bengkulu" ...
## $ Bawang Merah : num [1:34] 31468 26137 33347 32120 30167 ...
## $ Bawang Putih (Bonggol): num [1:34] 34120 30857 33235 33255 30709 ...
## $ Beras Medium : num [1:34] 12110 12533 11619 11954 11963 ...
## $ Beras Premium : num [1:34] 13315 13760 13048 13451 13034 ...
## $ Cabai Merah Keriting : num [1:34] 38510 37637 44197 38640 36152 ...
## $ Cabai Rawit Merah : num [1:34] 37818 47496 54980 46950 45732 ...
## $ Daging Ayam Ras : num [1:34] 30711 39085 37009 33862 34190 ...
## $ Daging Sapi Murni : num [1:34] 155161 116042 137769 134991 135672 ...
## $ Gula Konsumsi : num [1:34] 15334 14370 14630 14834 14421 ...
## $ Minyak Goreng Curah : num [1:34] 13887 15391 13857 15259 13976 ...
## $ Minyak Goreng Kemasan : num [1:34] 18384 17741 15376 16182 15782 ...
## $ Telur Ayam Ras : num [1:34] 25399 27396 28143 26655 27502 ...
Setiap komoditas dikonversi menjadi kategori: - “Tinggi” jika harga di provinsi tersebut ≥ median nasional - “Rendah” jika harga < median
df_cat <- df_wide_clean %>%
mutate(across(-NamaProv,
~ ifelse(. >= median(., na.rm = TRUE),
"Tinggi", "Rendah")))
df_cat[, -1] <- lapply(df_cat[, -1], factor)set.seed(123)
k <- 3
km_res <- kmeans(mca_coord, centers = k, nstart = 25)
df_cat$Cluster <- as.factor(km_res$cluster)mat <- df_cat[, -c(1, ncol(df_cat))] # hapus NamaProv & Cluster
# Konversi kategori ke nilai 0/1
mat_num <- ifelse(mat == "Tinggi", 1, 0)
# Tambahkan nama provinsi sebagai rownames
rownames(mat_num) <- df_cat$NamaProvpheatmap(mat_num,
cluster_rows = TRUE,
cluster_cols = TRUE,
color = colorRampPalette(c("blue", "white", "red"))(50),
legend = TRUE,
main = "Heatmap Tinggi–Rendah Harga Komoditas (0 = rendah, 1 = tinggi)",
fontsize_row = 8,
fontsize_col = 9)Cluster 1: provinsi dengan harga relatif rendah Cluster 2: harga tinggi untuk bawang/cabai Cluster 3: provinsi dengan harga sangat tinggi
fviz_cluster(km_res, data = mca_coord,
geom = "point",
ellipse.type = "convex",
palette = "jco",
main = "Cluster Provinsi Berdasarkan Harga Multi-Komoditas")library(ggplot2)
library(ggrepel)
# Gabungkan data kategori + koordinat MCA
plot_data <- cbind(df_cat, mca_coord)
ggplot(plot_data,
aes(x = `Dim 2`, y = `Dim 1`,
color = Cluster, shape = Cluster)) +
geom_point(size = 4, alpha = 0.9) +
geom_text_repel(aes(label = NamaProv),
size = 3.5, max.overlaps = 20) +
stat_ellipse(type = "convex",
linetype = 2, linewidth = 0.8) +
scale_color_manual(values = c("#2E86DE", "#F1C40F", "#7F8C8D")) +
labs(
title = "Cluster Provinsi Berdasarkan Harga Multi-Komoditas",
subtitle = "Metode MCA + K-Means (k = 3)",
x = paste0("Dim1 (", round(mca_res$eig[1, 2], 1), "%)"),
y = paste0("Dim2 (", round(mca_res$eig[2, 2], 1), "%)")
) +
theme_minimal(base_size = 14) +
theme(
plot.title = element_text(face = "bold"),
legend.position = "right"
)## Unrecognized ellipse type
## Unrecognized ellipse type
## Unrecognized ellipse type
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_path()`).
shp <- st_read("C:/Users/Ainul Hayati/Documents/kuliah/Departemen/Semester 5/Regresi Spasial/data studi kasus/idn_adm_bps_20200401_shp/idn_admbnda_adm1_bps_20200401.shp")## Reading layer `idn_admbnda_adm1_bps_20200401' from data source
## `C:\Users\Ainul Hayati\Documents\kuliah\Departemen\Semester 5\Regresi Spasial\data studi kasus\idn_adm_bps_20200401_shp\idn_admbnda_adm1_bps_20200401.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 34 features and 12 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 95.01079 ymin: -11.00762 xmax: 141.0194 ymax: 6.07693
## Geodetic CRS: WGS 84
# Simpan geometry
geo <- st_geometry(shp)
# Drop geometry → menjadi data.frame biasa
shp_nogeo <- shp %>% st_drop_geometry()
# Join berdasar nama provinsi
merged <- shp_nogeo %>%
left_join(df_cat, by = c("ADM1_EN" = "NamaProv"))
# Gabungkan kembali geometry
merged_sf <- st_as_sf(merged, geometry = geo)ggplot(merged_sf) +
geom_sf(aes(fill = Cluster), color = "white") +
scale_fill_manual(values = c("#2E86DE", "#F1C40F", "#7F8C8D")) +
theme_minimal() +
labs(title = "Peta Cluster Provinsi")