library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cluster)
library(factoextra)
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library(dbscan)
##
## Attaching package: 'dbscan'
##
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(e1071)
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:ggplot2':
##
## element
library(fpc)
##
## Attaching package: 'fpc'
##
## The following object is masked from 'package:dbscan':
##
## dbscan
library(flexclust)
##
## Attaching package: 'flexclust'
##
## The following object is masked from 'package:e1071':
##
## bclust
library(meanShiftR)
data <- read.csv("hfi_cc_2018.csv")
# simpan nama negara
negara <- data$country
# ambil numerik & hapus year
data_num <- data %>%
select(where(is.numeric)) %>%
select(-year)
# imputasi missing value
data_num <- data_num %>%
mutate(across(everything(),
~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))
# scaling
df <- scale(data_num)
set.seed(123)
wss <- sapply(1:10, function(k){
kmeans(df, centers = k, nstart = 20)$tot.withinss
})
plot(1:10, wss, type = "b", pch = 19,
xlab = "Number of clusters K",
ylab = "WSS",
main = "Elbow Method")
# SILHOUETTE METHOD
avg_sil <- function(k) {
km_res <- kmeans(df, centers = k, nstart = 25)
ss <- silhouette(km_res$cluster, dist(df))
mean(ss[, 3])
}
k_values <- 2:10
avg_sil_values <- sapply(k_values, avg_sil)
plot(k_values, avg_sil_values, type = "b", pch = 19,
xlab = "K",
ylab = "Silhouette",
main = "Silhouette Analysis")
best_k <- k_values[which.max(avg_sil_values)]
k <- best_k
k
## [1] 2
# K-Means
km_res <- kmeans(df, centers = k, nstart = 25)
# K-Median
kmed_res <- kcca(df, k = k, family = kccaFamily("kmedians"))
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
# DBSCAN
kNNdistplot(df, k = 5)
abline(h = 2, col = "red") # lihat siku
db_res <- dbscan::dbscan(df, eps = 2, minPts = 4)
# Fuzzy C-Means
fcm_result <- cmeans(df, centers = k, m = 2)
fcm_cluster <- apply(fcm_result$membership, 1, which.max)
# Mean Shift
df_small <- df[1:300, ]
ms_res <- meanShift(df_small)
# ambil cluster
ms_cluster <- ms_res$assignment
hitung_metrics <- function(data, cluster_label) {
valid_idx <- cluster_label != 0
data_valid <- data[valid_idx, ]
cluster_valid <- cluster_label[valid_idx]
if (length(unique(cluster_valid)) < 2) {
return(c(Silhouette = NA,
Dunn_Index = NA,
Calinski_Harabasz = NA))
}
sil <- mean(silhouette(cluster_valid, dist(data_valid))[,3])
stats <- cluster.stats(dist(data_valid), cluster_valid)
return(c(Silhouette = sil,
Dunn_Index = stats$dunn,
Calinski_Harabasz = stats$ch))
}
metric_kmeans <- hitung_metrics(df, km_res$cluster)
metric_kmedian <- hitung_metrics(df, clusters(kmed_res))
metric_dbscan <- hitung_metrics(df, db_res$cluster)
metric_fcm <- hitung_metrics(df, fcm_cluster)
metric_meanshift <- hitung_metrics(df_small, ms_cluster)
hasil_metrics <- rbind(
KMeans = metric_kmeans,
KMedian = metric_kmedian,
DBSCAN = metric_dbscan,
FuzzyCMeans = metric_fcm,
MeanShift = metric_meanshift
)
hasil_metrics
## Silhouette Dunn_Index Calinski_Harabasz
## KMeans 0.1630417 0.07016222 357.6936
## KMedian 0.1660960 0.05098379 347.4925
## DBSCAN 0.8591816 2.25288476 255.8268
## FuzzyCMeans 0.1605009 0.01541215 340.3827
## MeanShift 0.2639706 0.98337648 154.4929
normalize <- function(x) {
(x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}
metrics_norm <- as.data.frame(hasil_metrics)
metrics_norm$Silhouette <- normalize(metrics_norm$Silhouette)
metrics_norm$Dunn_Index <- normalize(metrics_norm$Dunn_Index)
metrics_norm$Calinski_Harabasz <- normalize(metrics_norm$Calinski_Harabasz)
# hitung score (fix NA)
metrics_norm$Score <- rowMeans(metrics_norm[,1:3], na.rm = TRUE)
metrics_norm
## Silhouette Dunn_Index Calinski_Harabasz Score
## KMeans 0.003636447 0.02446961 1.0000000 0.3427020
## KMedian 0.008008079 0.01589814 0.9497979 0.3245680
## DBSCAN 1.000000000 1.00000000 0.4986889 0.8328963
## FuzzyCMeans 0.000000000 0.00000000 0.9148090 0.3049363
## MeanShift 0.148092978 0.43261505 0.0000000 0.1935693
# metode terbaik
metrics_model <- metrics_norm[rownames(metrics_norm) != "DBSCAN", ]
best_method <- rownames(metrics_model)[which.max(metrics_model$Score)]
best_method
## [1] "KMeans"
# gunakan K-Means sebagai metode terbaik
data$cluster_kmeans <- km_res$cluster
# jumlah data tiap cluster
table(data$cluster_kmeans)
##
## 1 2
## 961 497
# rata-rata tiap variabel
cluster_summary <- aggregate(data_num,
by = list(Cluster = data$cluster_kmeans),
mean)
cluster_summary
## Cluster pf_rol_procedural pf_rol_civil pf_rol_criminal pf_rol
## 1 1 4.845626 5.045163 4.511896 4.510878
## 2 2 7.027429 6.305458 6.073082 6.854129
## pf_ss_homicide pf_ss_disappearances_disap pf_ss_disappearances_violent
## 1 6.693057 7.588292 9.322524
## 2 8.805025 9.798947 9.900251
## pf_ss_disappearances_organized pf_ss_disappearances_fatalities
## 1 5.792661 9.396583
## 2 8.668202 9.949240
## pf_ss_disappearances_injuries pf_ss_disappearances pf_ss_women_fgm
## 1 9.449741 8.355005 8.925838
## 2 9.886827 9.694394 9.839389
## pf_ss_women_missing pf_ss_women_inheritance_widows
## 1 8.825506 5.265587
## 2 9.705746 8.213446
## pf_ss_women_inheritance_daughters pf_ss_women_inheritance pf_ss_women
## 1 5.219674 5.262046 7.645383
## 2 8.270232 9.291357 9.658766
## pf_ss pf_movement_domestic pf_movement_foreign pf_movement_women
## 1 7.557016 6.790319 6.891994 7.149194
## 2 9.387792 9.465432 9.193336 9.776541
## pf_movement pf_religion_estop_establish pf_religion_estop_operate
## 1 6.940738 7.075930 7.044008
## 2 9.530516 8.390598 8.368256
## pf_religion_estop pf_religion_harassment pf_religion_restrictions pf_religion
## 1 6.703916 8.760943 7.255180 7.606956
## 2 8.888819 8.821350 7.380952 8.393694
## pf_association_association pf_association_assembly
## 1 6.943569 6.484547
## 2 9.530830 9.223129
## pf_association_political_establish pf_association_political_operate
## 1 6.954442 6.003067
## 2 8.544804 7.906089
## pf_association_political pf_association_prof_establish
## 1 5.882065 6.962343
## 2 8.914576 8.317678
## pf_association_prof_operate pf_association_prof
## 1 6.359428 6.094223
## 2 8.118228 8.758773
## pf_association_sport_establish pf_association_sport_operate
## 1 7.958973 6.835770
## 2 9.053835 8.222789
## pf_association_sport pf_association pf_expression_killed pf_expression_jailed
## 1 7.091031 6.533754 8.972414 9.543025
## 2 9.181820 9.146387 9.730306 9.876865
## pf_expression_influence pf_expression_control pf_expression_cable
## 1 4.061072 4.265798 8.653685
## 2 7.403085 7.178068 9.784140
## pf_expression_newspapers pf_expression_internet pf_expression
## 1 7.974229 8.003153 7.201873
## 2 9.774378 9.625704 9.031542
## pf_identity_legal pf_identity_parental_marriage pf_identity_parental_divorce
## 1 6.194092 6.388952 6.909644
## 2 6.526250 8.909361 9.030859
## pf_identity_parental pf_identity_sex_male pf_identity_sex_female
## 1 6.217965 5.345322 6.956122
## 2 9.768323 9.346076 9.828974
## pf_identity_sex pf_identity_divorce pf_identity pf_score pf_rank
## 1 6.156157 7.086609 6.187549 6.457756 101.71666
## 2 9.587525 8.412165 9.551308 8.638953 29.85714
## ef_government_consumption ef_government_transfers ef_government_enterprises
## 1 6.114517 8.519894 5.221054
## 2 4.711492 5.890471 8.054326
## ef_government_tax_income ef_government_tax_payroll ef_government_tax
## 1 7.697030 5.892059 6.849640
## 2 6.855131 4.492616 5.682093
## ef_government ef_legal_judicial ef_legal_courts ef_legal_protection
## 1 6.651189 4.191917 4.035450 4.970015
## 2 6.078397 6.265077 5.097239 6.704441
## ef_legal_military ef_legal_integrity ef_legal_enforcement
## 1 5.278449 5.420906 3.941650
## 2 8.618120 7.639804 5.327186
## ef_legal_restrictions ef_legal_police ef_legal_crime ef_legal_gender ef_legal
## 1 6.985076 4.826071 5.390513 0.8418501 4.517214
## 2 8.056019 6.852256 6.920850 0.9743559 6.754243
## ef_money_growth ef_money_sd ef_money_inflation ef_money_currency ef_money
## 1 8.446402 7.821512 8.570999 5.330547 7.557004
## 2 8.922866 9.261469 9.471212 9.225352 9.220225
## ef_trade_tariffs_revenue ef_trade_tariffs_mean ef_trade_tariffs_sd
## 1 7.583936 7.871953 5.841810
## 2 9.391222 8.927002 6.111363
## ef_trade_tariffs ef_trade_regulatory_nontariff ef_trade_regulatory_compliance
## 1 7.081680 5.361596 5.648017
## 2 8.143196 6.445521 8.574304
## ef_trade_regulatory ef_trade_black ef_trade_movement_foreign
## 1 5.449922 9.649387 5.550153
## 2 7.513402 9.995033 6.664347
## ef_trade_movement_capital ef_trade_movement_visit ef_trade_movement ef_trade
## 1 2.749954 4.410645 4.121357 6.563232
## 2 5.196104 7.174246 6.355402 8.001758
## ef_regulation_credit_ownership ef_regulation_credit_private
## 1 7.091753 7.663644
## 2 8.875652 8.353129
## ef_regulation_credit_interest ef_regulation_credit
## 1 9.082272 7.950375
## 2 9.810865 9.016234
## ef_regulation_labor_minwage ef_regulation_labor_firing
## 1 6.388581 4.765343
## 2 6.557600 4.694122
## ef_regulation_labor_bargain ef_regulation_labor_hours
## 1 6.488190 8.016037
## 2 6.337891 7.857642
## ef_regulation_labor_dismissal ef_regulation_labor_conscription
## 1 5.753854 5.994736
## 2 7.824195 7.637827
## ef_regulation_labor ef_regulation_business_adm
## 1 6.240481 3.929901
## 2 6.824742 4.051716
## ef_regulation_business_bureaucracy ef_regulation_business_start
## 1 4.314605 8.511451
## 2 7.120332 9.514582
## ef_regulation_business_bribes ef_regulation_business_licensing
## 1 4.192509 7.423229
## 2 6.227498 8.230747
## ef_regulation_business_compliance ef_regulation_business ef_regulation
## 1 6.611650 5.894743 6.689298
## 2 7.697693 7.135437 7.658804
## ef_score ef_rank hf_score hf_rank hf_quartile
## 1 6.394015 98.81254 6.425886 102.17548 3.108476
## 2 7.542797 34.74447 8.090875 28.34406 1.295775
# variabel paling membedakan
variansi_cluster <- apply(cluster_summary[,-1], 2, var)
sort(variansi_cluster, decreasing = TRUE)
## hf_rank pf_rank
## 2.725539e+03 2.581895e+03
## ef_rank pf_ss_women_inheritance
## 2.052359e+03 8.117674e+00
## pf_identity_sex_male ef_money_currency
## 8.003018e+00 7.584752e+00
## pf_identity_parental pf_identity_sex
## 6.302522e+00 5.887144e+00
## pf_identity pf_expression_influence
## 5.657437e+00 5.584527e+00
## ef_legal_military pf_ss_women_inheritance_daughters
## 5.576703e+00 4.652952e+00
## pf_association_political pf_ss_women_inheritance_widows
## 4.598063e+00 4.344938e+00
## ef_trade_regulatory_compliance pf_expression_control
## 4.281578e+00 4.240659e+00
## pf_ss_disappearances_organized pf_identity_sex_female
## 4.134366e+00 4.126639e+00
## ef_government_enterprises ef_regulation_business_bureaucracy
## 4.013715e+00 3.936052e+00
## ef_trade_movement_visit pf_association_assembly
## 3.818746e+00 3.749915e+00
## pf_movement_domestic pf_association_prof
## 3.578116e+00 3.549914e+00
## ef_government_transfers pf_movement_women
## 3.456932e+00 3.451474e+00
## pf_association pf_movement
## 3.412926e+00 3.353475e+00
## pf_association_association pf_identity_parental_marriage
## 3.346958e+00 3.176229e+00
## ef_trade_movement_capital pf_rol
## 2.991826e+00 2.745412e+00
## pf_movement_foreign ef_legal
## 2.648089e+00 2.502150e+00
## ef_trade_movement ef_legal_integrity
## 2.495480e+00 2.461755e+00
## pf_ss_disappearances_disap pf_religion_estop
## 2.443498e+00 2.386900e+00
## pf_rol_procedural pf_score
## 2.380131e+00 2.378809e+00
## pf_identity_parental_divorce pf_ss_homicide
## 2.249777e+00 2.230203e+00
## pf_association_sport ef_legal_judicial
## 2.185698e+00 2.148995e+00
## ef_regulation_labor_dismissal ef_trade_regulatory
## 2.143157e+00 2.128974e+00
## ef_regulation_business_bribes ef_legal_police
## 2.070591e+00 2.052713e+00
## pf_ss_women pf_association_political_operate
## 2.026856e+00 1.810745e+00
## pf_ss pf_expression
## 1.675871e+00 1.673843e+00
## hf_quartile ef_trade_tariffs_revenue
## 1.642943e+00 1.633141e+00
## pf_expression_newspapers ef_regulation_credit_ownership
## 1.620268e+00 1.591149e+00
## pf_association_prof_operate ef_legal_protection
## 1.546689e+00 1.504118e+00
## hf_score ef_money
## 1.386094e+00 1.383152e+00
## ef_regulation_labor_conscription pf_expression_internet
## 1.349874e+00 1.316336e+00
## pf_association_political_establish pf_rol_criminal
## 1.264626e+00 1.218651e+00
## ef_legal_crime ef_money_sd
## 1.170964e+00 1.036739e+00
## ef_trade ef_government_consumption
## 1.034679e+00 9.842396e-01
## ef_government_tax_payroll pf_association_sport_operate
## 9.792208e-01 9.619116e-01
## ef_legal_enforcement pf_association_prof_establish
## 9.598545e-01 9.184662e-01
## pf_ss_disappearances pf_identity_divorce
## 8.969818e-01 8.785504e-01
## pf_religion_estop_operate pf_religion_estop_establish
## 8.768169e-01 8.641756e-01
## pf_rol_civil ef_regulation_business
## 7.941722e-01 7.696611e-01
## ef_government_tax ef_score
## 6.815841e-01 6.598494e-01
## pf_expression_cable ef_trade_movement_foreign
## 6.389634e-01 6.207139e-01
## pf_association_sport_establish ef_regulation_business_compliance
## 5.993612e-01 5.897445e-01
## ef_trade_regulatory_nontariff ef_legal_restrictions
## 5.874465e-01 5.734589e-01
## ef_regulation_credit ef_legal_courts
## 5.680276e-01 5.636988e-01
## ef_trade_tariffs ef_trade_tariffs_mean
## 5.634072e-01 5.565642e-01
## ef_regulation_business_start ef_regulation
## 5.031361e-01 4.699710e-01
## pf_ss_women_fgm ef_money_inflation
## 4.172873e-01 4.051916e-01
## pf_ss_women_missing ef_government_tax_income
## 3.874106e-01 3.543969e-01
## ef_regulation_business_licensing pf_religion
## 3.260428e-01 3.094778e-01
## pf_expression_killed ef_regulation_credit_interest
## 2.872000e-01 2.654241e-01
## ef_regulation_credit_private ef_regulation_labor
## 2.376946e-01 1.706810e-01
## pf_ss_disappearances_violent ef_government
## 1.668845e-01 1.640453e-01
## pf_ss_disappearances_fatalities ef_money_growth
## 1.527149e-01 1.135090e-01
## pf_ss_disappearances_injuries ef_trade_black
## 9.552213e-02 5.973555e-02
## pf_expression_jailed pf_identity_legal
## 5.572448e-02 5.516469e-02
## ef_trade_tariffs_sd ef_regulation_labor_minwage
## 3.632918e-02 1.428365e-02
## ef_regulation_labor_hours ef_regulation_labor_bargain
## 1.254448e-02 1.129481e-02
## ef_legal_gender pf_religion_restrictions
## 8.778901e-03 7.909330e-03
## ef_regulation_business_adm ef_regulation_labor_firing
## 7.419422e-03 2.536180e-03
## pf_religion_harassment
## 1.824506e-03
par(mfrow = c(1,1))
boxplot(data_num$hf_score ~ data$cluster_kmeans,
main = "Human Freedom Score")
cluster_count <- table(data$cluster_kmeans)
pie(cluster_count,
main = "Distribusi Cluster")
pca <- prcomp(df)
plot(pca$x[,1:2],
col = data$cluster_kmeans,
pch = 19,
main = "Clustering K-Means (PCA)",
xlab = "PC1",
ylab = "PC2")
par(mfrow = c(1,3))
boxplot(data_num$hf_score ~ data$cluster_kmeans,
main = "HF Score")
boxplot(data_num$pf_score ~ data$cluster_kmeans,
main = "Personal Freedom")
boxplot(data_num$ef_score ~ data$cluster_kmeans,
main = "Economic Freedom")
# rata-rata hf_score per cluster
avg_hf <- tapply(data_num$hf_score, data$cluster_kmeans, mean)
barplot(avg_hf,
main = "Rata-rata Human Freedom per Cluster",
xlab = "Cluster",
ylab = "HF Score")