LOAD LIBRARY

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cluster)
library(factoextra)
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library(dbscan)
## 
## Attaching package: 'dbscan'
## 
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(e1071)
## 
## Attaching package: 'e1071'
## 
## The following object is masked from 'package:ggplot2':
## 
##     element
library(fpc)
## 
## Attaching package: 'fpc'
## 
## The following object is masked from 'package:dbscan':
## 
##     dbscan
library(flexclust)
## 
## Attaching package: 'flexclust'
## 
## The following object is masked from 'package:e1071':
## 
##     bclust
library(meanShiftR)

LOAD DATA

data <- read.csv("hfi_cc_2018.csv")
# simpan nama negara
negara <- data$country

PREPROCESSING

# ambil numerik & hapus year
data_num <- data %>%
  select(where(is.numeric)) %>%
  select(-year)

# imputasi missing value
data_num <- data_num %>%
  mutate(across(everything(),
                ~ ifelse(is.na(.), mean(., na.rm = TRUE), .)))

# scaling
df <- scale(data_num)
set.seed(123)

ELBOW METHOD

wss <- sapply(1:10, function(k){
  kmeans(df, centers = k, nstart = 20)$tot.withinss
})

plot(1:10, wss, type = "b", pch = 19,
     xlab = "Number of clusters K",
     ylab = "WSS",
     main = "Elbow Method")

# SILHOUETTE METHOD

avg_sil <- function(k) {
  km_res <- kmeans(df, centers = k, nstart = 25)
  ss <- silhouette(km_res$cluster, dist(df))
  mean(ss[, 3])
}

k_values <- 2:10
avg_sil_values <- sapply(k_values, avg_sil)

plot(k_values, avg_sil_values, type = "b", pch = 19,
     xlab = "K",
     ylab = "Silhouette",
     main = "Silhouette Analysis")

best_k <- k_values[which.max(avg_sil_values)]
k <- best_k
k
## [1] 2

CLUSTERING

# K-Means
km_res <- kmeans(df, centers = k, nstart = 25)

# K-Median
kmed_res <- kcca(df, k = k, family = kccaFamily("kmedians"))
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
## Found more than one class "kcca" in cache; using the first, from namespace 'kernlab'
## Also defined by 'flexclust'
# DBSCAN
kNNdistplot(df, k = 5)
abline(h = 2, col = "red")  # lihat siku

db_res <- dbscan::dbscan(df, eps = 2, minPts = 4)

# Fuzzy C-Means
fcm_result <- cmeans(df, centers = k, m = 2)
fcm_cluster <- apply(fcm_result$membership, 1, which.max)

# Mean Shift
df_small <- df[1:300, ]
ms_res <- meanShift(df_small)

# ambil cluster
ms_cluster <- ms_res$assignment

FUNGSI METRIC

hitung_metrics <- function(data, cluster_label) {
  
  valid_idx <- cluster_label != 0
  
  data_valid <- data[valid_idx, ]
  cluster_valid <- cluster_label[valid_idx]
  
  if (length(unique(cluster_valid)) < 2) {
    return(c(Silhouette = NA,
             Dunn_Index = NA,
             Calinski_Harabasz = NA))
  }
  
  sil <- mean(silhouette(cluster_valid, dist(data_valid))[,3])
  stats <- cluster.stats(dist(data_valid), cluster_valid)
  
  return(c(Silhouette = sil,
           Dunn_Index = stats$dunn,
           Calinski_Harabasz = stats$ch))
}

HITUNG METRICS

metric_kmeans <- hitung_metrics(df, km_res$cluster)
metric_kmedian <- hitung_metrics(df, clusters(kmed_res))
metric_dbscan <- hitung_metrics(df, db_res$cluster)
metric_fcm <- hitung_metrics(df, fcm_cluster)
metric_meanshift <- hitung_metrics(df_small, ms_cluster)

TABEL METRICS

hasil_metrics <- rbind(
  KMeans = metric_kmeans,
  KMedian = metric_kmedian,
  DBSCAN = metric_dbscan,
  FuzzyCMeans = metric_fcm,
  MeanShift = metric_meanshift
)

hasil_metrics
##             Silhouette Dunn_Index Calinski_Harabasz
## KMeans       0.1630417 0.07016222          357.6936
## KMedian      0.1660960 0.05098379          347.4925
## DBSCAN       0.8591816 2.25288476          255.8268
## FuzzyCMeans  0.1605009 0.01541215          340.3827
## MeanShift    0.2639706 0.98337648          154.4929

NORMALISASI & SKOR

normalize <- function(x) {
  (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}

metrics_norm <- as.data.frame(hasil_metrics)

metrics_norm$Silhouette <- normalize(metrics_norm$Silhouette)
metrics_norm$Dunn_Index <- normalize(metrics_norm$Dunn_Index)
metrics_norm$Calinski_Harabasz <- normalize(metrics_norm$Calinski_Harabasz)

# hitung score (fix NA)
metrics_norm$Score <- rowMeans(metrics_norm[,1:3], na.rm = TRUE)
metrics_norm
##              Silhouette Dunn_Index Calinski_Harabasz     Score
## KMeans      0.003636447 0.02446961         1.0000000 0.3427020
## KMedian     0.008008079 0.01589814         0.9497979 0.3245680
## DBSCAN      1.000000000 1.00000000         0.4986889 0.8328963
## FuzzyCMeans 0.000000000 0.00000000         0.9148090 0.3049363
## MeanShift   0.148092978 0.43261505         0.0000000 0.1935693
# metode terbaik
metrics_model <- metrics_norm[rownames(metrics_norm) != "DBSCAN", ]
best_method <- rownames(metrics_model)[which.max(metrics_model$Score)]
best_method
## [1] "KMeans"

ADE (K-MEANS)

# gunakan K-Means sebagai metode terbaik
data$cluster_kmeans <- km_res$cluster

# jumlah data tiap cluster
table(data$cluster_kmeans)
## 
##   1   2 
## 961 497
# rata-rata tiap variabel
cluster_summary <- aggregate(data_num,
                             by = list(Cluster = data$cluster_kmeans),
                             mean)

cluster_summary
##   Cluster pf_rol_procedural pf_rol_civil pf_rol_criminal   pf_rol
## 1       1          4.845626     5.045163        4.511896 4.510878
## 2       2          7.027429     6.305458        6.073082 6.854129
##   pf_ss_homicide pf_ss_disappearances_disap pf_ss_disappearances_violent
## 1       6.693057                   7.588292                     9.322524
## 2       8.805025                   9.798947                     9.900251
##   pf_ss_disappearances_organized pf_ss_disappearances_fatalities
## 1                       5.792661                        9.396583
## 2                       8.668202                        9.949240
##   pf_ss_disappearances_injuries pf_ss_disappearances pf_ss_women_fgm
## 1                      9.449741             8.355005        8.925838
## 2                      9.886827             9.694394        9.839389
##   pf_ss_women_missing pf_ss_women_inheritance_widows
## 1            8.825506                       5.265587
## 2            9.705746                       8.213446
##   pf_ss_women_inheritance_daughters pf_ss_women_inheritance pf_ss_women
## 1                          5.219674                5.262046    7.645383
## 2                          8.270232                9.291357    9.658766
##      pf_ss pf_movement_domestic pf_movement_foreign pf_movement_women
## 1 7.557016             6.790319            6.891994          7.149194
## 2 9.387792             9.465432            9.193336          9.776541
##   pf_movement pf_religion_estop_establish pf_religion_estop_operate
## 1    6.940738                    7.075930                  7.044008
## 2    9.530516                    8.390598                  8.368256
##   pf_religion_estop pf_religion_harassment pf_religion_restrictions pf_religion
## 1          6.703916               8.760943                 7.255180    7.606956
## 2          8.888819               8.821350                 7.380952    8.393694
##   pf_association_association pf_association_assembly
## 1                   6.943569                6.484547
## 2                   9.530830                9.223129
##   pf_association_political_establish pf_association_political_operate
## 1                           6.954442                         6.003067
## 2                           8.544804                         7.906089
##   pf_association_political pf_association_prof_establish
## 1                 5.882065                      6.962343
## 2                 8.914576                      8.317678
##   pf_association_prof_operate pf_association_prof
## 1                    6.359428            6.094223
## 2                    8.118228            8.758773
##   pf_association_sport_establish pf_association_sport_operate
## 1                       7.958973                     6.835770
## 2                       9.053835                     8.222789
##   pf_association_sport pf_association pf_expression_killed pf_expression_jailed
## 1             7.091031       6.533754             8.972414             9.543025
## 2             9.181820       9.146387             9.730306             9.876865
##   pf_expression_influence pf_expression_control pf_expression_cable
## 1                4.061072              4.265798            8.653685
## 2                7.403085              7.178068            9.784140
##   pf_expression_newspapers pf_expression_internet pf_expression
## 1                 7.974229               8.003153      7.201873
## 2                 9.774378               9.625704      9.031542
##   pf_identity_legal pf_identity_parental_marriage pf_identity_parental_divorce
## 1          6.194092                      6.388952                     6.909644
## 2          6.526250                      8.909361                     9.030859
##   pf_identity_parental pf_identity_sex_male pf_identity_sex_female
## 1             6.217965             5.345322               6.956122
## 2             9.768323             9.346076               9.828974
##   pf_identity_sex pf_identity_divorce pf_identity pf_score   pf_rank
## 1        6.156157            7.086609    6.187549 6.457756 101.71666
## 2        9.587525            8.412165    9.551308 8.638953  29.85714
##   ef_government_consumption ef_government_transfers ef_government_enterprises
## 1                  6.114517                8.519894                  5.221054
## 2                  4.711492                5.890471                  8.054326
##   ef_government_tax_income ef_government_tax_payroll ef_government_tax
## 1                 7.697030                  5.892059          6.849640
## 2                 6.855131                  4.492616          5.682093
##   ef_government ef_legal_judicial ef_legal_courts ef_legal_protection
## 1      6.651189          4.191917        4.035450            4.970015
## 2      6.078397          6.265077        5.097239            6.704441
##   ef_legal_military ef_legal_integrity ef_legal_enforcement
## 1          5.278449           5.420906             3.941650
## 2          8.618120           7.639804             5.327186
##   ef_legal_restrictions ef_legal_police ef_legal_crime ef_legal_gender ef_legal
## 1              6.985076        4.826071       5.390513       0.8418501 4.517214
## 2              8.056019        6.852256       6.920850       0.9743559 6.754243
##   ef_money_growth ef_money_sd ef_money_inflation ef_money_currency ef_money
## 1        8.446402    7.821512           8.570999          5.330547 7.557004
## 2        8.922866    9.261469           9.471212          9.225352 9.220225
##   ef_trade_tariffs_revenue ef_trade_tariffs_mean ef_trade_tariffs_sd
## 1                 7.583936              7.871953            5.841810
## 2                 9.391222              8.927002            6.111363
##   ef_trade_tariffs ef_trade_regulatory_nontariff ef_trade_regulatory_compliance
## 1         7.081680                      5.361596                       5.648017
## 2         8.143196                      6.445521                       8.574304
##   ef_trade_regulatory ef_trade_black ef_trade_movement_foreign
## 1            5.449922       9.649387                  5.550153
## 2            7.513402       9.995033                  6.664347
##   ef_trade_movement_capital ef_trade_movement_visit ef_trade_movement ef_trade
## 1                  2.749954                4.410645          4.121357 6.563232
## 2                  5.196104                7.174246          6.355402 8.001758
##   ef_regulation_credit_ownership ef_regulation_credit_private
## 1                       7.091753                     7.663644
## 2                       8.875652                     8.353129
##   ef_regulation_credit_interest ef_regulation_credit
## 1                      9.082272             7.950375
## 2                      9.810865             9.016234
##   ef_regulation_labor_minwage ef_regulation_labor_firing
## 1                    6.388581                   4.765343
## 2                    6.557600                   4.694122
##   ef_regulation_labor_bargain ef_regulation_labor_hours
## 1                    6.488190                  8.016037
## 2                    6.337891                  7.857642
##   ef_regulation_labor_dismissal ef_regulation_labor_conscription
## 1                      5.753854                         5.994736
## 2                      7.824195                         7.637827
##   ef_regulation_labor ef_regulation_business_adm
## 1            6.240481                   3.929901
## 2            6.824742                   4.051716
##   ef_regulation_business_bureaucracy ef_regulation_business_start
## 1                           4.314605                     8.511451
## 2                           7.120332                     9.514582
##   ef_regulation_business_bribes ef_regulation_business_licensing
## 1                      4.192509                         7.423229
## 2                      6.227498                         8.230747
##   ef_regulation_business_compliance ef_regulation_business ef_regulation
## 1                          6.611650               5.894743      6.689298
## 2                          7.697693               7.135437      7.658804
##   ef_score  ef_rank hf_score   hf_rank hf_quartile
## 1 6.394015 98.81254 6.425886 102.17548    3.108476
## 2 7.542797 34.74447 8.090875  28.34406    1.295775
# variabel paling membedakan
variansi_cluster <- apply(cluster_summary[,-1], 2, var)
sort(variansi_cluster, decreasing = TRUE)
##                            hf_rank                            pf_rank 
##                       2.725539e+03                       2.581895e+03 
##                            ef_rank            pf_ss_women_inheritance 
##                       2.052359e+03                       8.117674e+00 
##               pf_identity_sex_male                  ef_money_currency 
##                       8.003018e+00                       7.584752e+00 
##               pf_identity_parental                    pf_identity_sex 
##                       6.302522e+00                       5.887144e+00 
##                        pf_identity            pf_expression_influence 
##                       5.657437e+00                       5.584527e+00 
##                  ef_legal_military  pf_ss_women_inheritance_daughters 
##                       5.576703e+00                       4.652952e+00 
##           pf_association_political     pf_ss_women_inheritance_widows 
##                       4.598063e+00                       4.344938e+00 
##     ef_trade_regulatory_compliance              pf_expression_control 
##                       4.281578e+00                       4.240659e+00 
##     pf_ss_disappearances_organized             pf_identity_sex_female 
##                       4.134366e+00                       4.126639e+00 
##          ef_government_enterprises ef_regulation_business_bureaucracy 
##                       4.013715e+00                       3.936052e+00 
##            ef_trade_movement_visit            pf_association_assembly 
##                       3.818746e+00                       3.749915e+00 
##               pf_movement_domestic                pf_association_prof 
##                       3.578116e+00                       3.549914e+00 
##            ef_government_transfers                  pf_movement_women 
##                       3.456932e+00                       3.451474e+00 
##                     pf_association                        pf_movement 
##                       3.412926e+00                       3.353475e+00 
##         pf_association_association      pf_identity_parental_marriage 
##                       3.346958e+00                       3.176229e+00 
##          ef_trade_movement_capital                             pf_rol 
##                       2.991826e+00                       2.745412e+00 
##                pf_movement_foreign                           ef_legal 
##                       2.648089e+00                       2.502150e+00 
##                  ef_trade_movement                 ef_legal_integrity 
##                       2.495480e+00                       2.461755e+00 
##         pf_ss_disappearances_disap                  pf_religion_estop 
##                       2.443498e+00                       2.386900e+00 
##                  pf_rol_procedural                           pf_score 
##                       2.380131e+00                       2.378809e+00 
##       pf_identity_parental_divorce                     pf_ss_homicide 
##                       2.249777e+00                       2.230203e+00 
##               pf_association_sport                  ef_legal_judicial 
##                       2.185698e+00                       2.148995e+00 
##      ef_regulation_labor_dismissal                ef_trade_regulatory 
##                       2.143157e+00                       2.128974e+00 
##      ef_regulation_business_bribes                    ef_legal_police 
##                       2.070591e+00                       2.052713e+00 
##                        pf_ss_women   pf_association_political_operate 
##                       2.026856e+00                       1.810745e+00 
##                              pf_ss                      pf_expression 
##                       1.675871e+00                       1.673843e+00 
##                        hf_quartile           ef_trade_tariffs_revenue 
##                       1.642943e+00                       1.633141e+00 
##           pf_expression_newspapers     ef_regulation_credit_ownership 
##                       1.620268e+00                       1.591149e+00 
##        pf_association_prof_operate                ef_legal_protection 
##                       1.546689e+00                       1.504118e+00 
##                           hf_score                           ef_money 
##                       1.386094e+00                       1.383152e+00 
##   ef_regulation_labor_conscription             pf_expression_internet 
##                       1.349874e+00                       1.316336e+00 
## pf_association_political_establish                    pf_rol_criminal 
##                       1.264626e+00                       1.218651e+00 
##                     ef_legal_crime                        ef_money_sd 
##                       1.170964e+00                       1.036739e+00 
##                           ef_trade          ef_government_consumption 
##                       1.034679e+00                       9.842396e-01 
##          ef_government_tax_payroll       pf_association_sport_operate 
##                       9.792208e-01                       9.619116e-01 
##               ef_legal_enforcement      pf_association_prof_establish 
##                       9.598545e-01                       9.184662e-01 
##               pf_ss_disappearances                pf_identity_divorce 
##                       8.969818e-01                       8.785504e-01 
##          pf_religion_estop_operate        pf_religion_estop_establish 
##                       8.768169e-01                       8.641756e-01 
##                       pf_rol_civil             ef_regulation_business 
##                       7.941722e-01                       7.696611e-01 
##                  ef_government_tax                           ef_score 
##                       6.815841e-01                       6.598494e-01 
##                pf_expression_cable          ef_trade_movement_foreign 
##                       6.389634e-01                       6.207139e-01 
##     pf_association_sport_establish  ef_regulation_business_compliance 
##                       5.993612e-01                       5.897445e-01 
##      ef_trade_regulatory_nontariff              ef_legal_restrictions 
##                       5.874465e-01                       5.734589e-01 
##               ef_regulation_credit                    ef_legal_courts 
##                       5.680276e-01                       5.636988e-01 
##                   ef_trade_tariffs              ef_trade_tariffs_mean 
##                       5.634072e-01                       5.565642e-01 
##       ef_regulation_business_start                      ef_regulation 
##                       5.031361e-01                       4.699710e-01 
##                    pf_ss_women_fgm                 ef_money_inflation 
##                       4.172873e-01                       4.051916e-01 
##                pf_ss_women_missing           ef_government_tax_income 
##                       3.874106e-01                       3.543969e-01 
##   ef_regulation_business_licensing                        pf_religion 
##                       3.260428e-01                       3.094778e-01 
##               pf_expression_killed      ef_regulation_credit_interest 
##                       2.872000e-01                       2.654241e-01 
##       ef_regulation_credit_private                ef_regulation_labor 
##                       2.376946e-01                       1.706810e-01 
##       pf_ss_disappearances_violent                      ef_government 
##                       1.668845e-01                       1.640453e-01 
##    pf_ss_disappearances_fatalities                    ef_money_growth 
##                       1.527149e-01                       1.135090e-01 
##      pf_ss_disappearances_injuries                     ef_trade_black 
##                       9.552213e-02                       5.973555e-02 
##               pf_expression_jailed                  pf_identity_legal 
##                       5.572448e-02                       5.516469e-02 
##                ef_trade_tariffs_sd        ef_regulation_labor_minwage 
##                       3.632918e-02                       1.428365e-02 
##          ef_regulation_labor_hours        ef_regulation_labor_bargain 
##                       1.254448e-02                       1.129481e-02 
##                    ef_legal_gender           pf_religion_restrictions 
##                       8.778901e-03                       7.909330e-03 
##         ef_regulation_business_adm         ef_regulation_labor_firing 
##                       7.419422e-03                       2.536180e-03 
##             pf_religion_harassment 
##                       1.824506e-03

VISUALISASI ADE

par(mfrow = c(1,1))
boxplot(data_num$hf_score ~ data$cluster_kmeans,
        main = "Human Freedom Score")

cluster_count <- table(data$cluster_kmeans)

pie(cluster_count,
    main = "Distribusi Cluster")

pca <- prcomp(df)

plot(pca$x[,1:2],
     col = data$cluster_kmeans,
     pch = 19,
     main = "Clustering K-Means (PCA)",
     xlab = "PC1",
     ylab = "PC2")

par(mfrow = c(1,3))

boxplot(data_num$hf_score ~ data$cluster_kmeans,
        main = "HF Score")

boxplot(data_num$pf_score ~ data$cluster_kmeans,
        main = "Personal Freedom")

boxplot(data_num$ef_score ~ data$cluster_kmeans,
        main = "Economic Freedom")

# rata-rata hf_score per cluster
avg_hf <- tapply(data_num$hf_score, data$cluster_kmeans, mean)

barplot(avg_hf,
        main = "Rata-rata Human Freedom per Cluster",
        xlab = "Cluster",
        ylab = "HF Score")