Load Library

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cluster)
library(factoextra)
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library(dbscan)
## 
## Attaching package: 'dbscan'
## 
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(e1071)
## 
## Attaching package: 'e1071'
## 
## The following object is masked from 'package:ggplot2':
## 
##     element
library(meanShiftR)

Load Data

df <- read_csv("Tugas Sem 4/Global_Mental_Health_Crisis_Index_2026.csv")
## Rows: 92 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (8): country, iso3, region, income_group, mh_policy_exists, mh_law_exis...
## dbl (21): depression_pct, anxiety_pct, suicide_rate_per100k, psychiatrists_p...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(df)
## spc_tbl_ [92 × 29] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ country                               : chr [1:92] "Malawi" "Mozambique" "Ethiopia" "Zimbabwe" ...
##  $ iso3                                  : chr [1:92] "MWI" "MOZ" "ETH" "ZWE" ...
##  $ region                                : chr [1:92] "Africa" "Africa" "Africa" "Africa" ...
##  $ income_group                          : chr [1:92] "Low" "Low" "Low" "Low" ...
##  $ depression_pct                        : num [1:92] 3.3 3.4 3.5 4 3.6 3.5 4.8 4.5 6.3 3.9 ...
##  $ anxiety_pct                           : num [1:92] 4.5 4.6 4.8 5.2 4.9 4.8 5.8 5.5 7.1 5.2 ...
##  $ suicide_rate_per100k                  : num [1:92] 6.5 8.2 10.2 15.4 8.6 12 87.5 40.5 18.5 9 ...
##  $ psychiatrists_per100k                 : num [1:92] 0.02 0.04 0.04 0.05 0.04 0.08 0.05 0.08 3.1 0.1 ...
##  $ mh_budget_pct_health                  : num [1:92] 0.8 0.6 1 0.7 0.8 1 1.1 1 2.8 0.9 ...
##  $ mh_spend_usd_per_capita               : num [1:92] 0.1 0.1 0.1 0.1 0.1 0.1 0.2 0.2 8 0.1 ...
##  $ treatment_gap_pct                     : num [1:92] 97 96 95 94 94 93 93 92 62 90 ...
##  $ social_media_hours_daily              : num [1:92] 1.1 1.2 1.4 2.1 1.6 1.8 1.3 1.4 2.8 4.8 ...
##  $ internet_penetration_pct              : num [1:92] 16 18 22 35 28 24 29 41 72 55 ...
##  $ gdp_per_capita_usd                    : num [1:92] 700 600 1200 1000 1200 ...
##  $ population_millions                   : num [1:92] 20.4 32.8 123.4 15.9 65.5 ...
##  $ covid_mh_increase_pct                 : num [1:92] 86 85 82 82 80 80 71 72 45 72 ...
##  $ youth_mh_crisis_score                 : num [1:92] 9.5 9.4 9.3 9.3 9.2 9.1 9.2 9.1 8.5 9.1 ...
##  $ mh_policy_exists                      : chr [1:92] "No" "No" "No" "No" ...
##  $ mh_law_exists                         : chr [1:92] "No" "No" "No" "No" ...
##  $ mh_crisis_index                       : num [1:92] 87 86 84 83 82 81 80 79 78 78 ...
##  $ total_affected_millions               : num [1:92] 1.59 2.62 10.24 1.46 5.57 ...
##  $ psychiatrists_per_million             : num [1:92] 0.2 0.4 0.4 0.5 0.4 0.8 0.5 0.8 31 1 ...
##  $ mh_investment_gap                     : num [1:92] 0.078 0.058 0.095 0.066 0.075 0.093 0.102 0.092 0.174 0.081 ...
##  $ social_media_mental_health_risk       : chr [1:92] "Low (<1.5h)" "Low (<1.5h)" "Low (<1.5h)" "Moderate (1.5-2.5h)" ...
##  $ depression_anxiety_comorbidity_est_pct: num [1:92] 1.65 1.7 1.75 2 1.8 1.75 2.4 2.25 3.15 1.95 ...
##  $ mh_system_score                       : num [1:92] 3.4 3.1 4.7 4.1 4.4 5.4 5.6 5.7 24.2 6 ...
##  $ income_group_code                     : num [1:92] 1 1 1 1 1 1 2 2 3 2 ...
##  $ data_year                             : num [1:92] 2025 2025 2025 2025 2025 ...
##  $ data_source                           : chr [1:92] "WHO Mental Health Atlas 2024 | GBD Study 2023 (IHME) | Our World in Data (Mar 2026) | OECD 2024 | DataReportal 2025" "WHO Mental Health Atlas 2024 | GBD Study 2023 (IHME) | Our World in Data (Mar 2026) | OECD 2024 | DataReportal 2025" "WHO Mental Health Atlas 2024 | GBD Study 2023 (IHME) | Our World in Data (Mar 2026) | OECD 2024 | DataReportal 2025" "WHO Mental Health Atlas 2024 | GBD Study 2023 (IHME) | Our World in Data (Mar 2026) | OECD 2024 | DataReportal 2025" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   country = col_character(),
##   ..   iso3 = col_character(),
##   ..   region = col_character(),
##   ..   income_group = col_character(),
##   ..   depression_pct = col_double(),
##   ..   anxiety_pct = col_double(),
##   ..   suicide_rate_per100k = col_double(),
##   ..   psychiatrists_per100k = col_double(),
##   ..   mh_budget_pct_health = col_double(),
##   ..   mh_spend_usd_per_capita = col_double(),
##   ..   treatment_gap_pct = col_double(),
##   ..   social_media_hours_daily = col_double(),
##   ..   internet_penetration_pct = col_double(),
##   ..   gdp_per_capita_usd = col_double(),
##   ..   population_millions = col_double(),
##   ..   covid_mh_increase_pct = col_double(),
##   ..   youth_mh_crisis_score = col_double(),
##   ..   mh_policy_exists = col_character(),
##   ..   mh_law_exists = col_character(),
##   ..   mh_crisis_index = col_double(),
##   ..   total_affected_millions = col_double(),
##   ..   psychiatrists_per_million = col_double(),
##   ..   mh_investment_gap = col_double(),
##   ..   social_media_mental_health_risk = col_character(),
##   ..   depression_anxiety_comorbidity_est_pct = col_double(),
##   ..   mh_system_score = col_double(),
##   ..   income_group_code = col_double(),
##   ..   data_year = col_double(),
##   ..   data_source = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>

Ambil data Numerik

data <- df %>%
  select(where(is.numeric))

Bersihkan data

data <- na.omit(data)
data <- data[apply(data, 1, function(x) all(is.finite(x))), ]
data <- data[, apply(data, 2, var) != 0]

Standarisasi

data_scaled <- scale(data)

Cek data

sum(is.na(data_scaled))
## [1] 0
sum(is.infinite(data_scaled))
## [1] 0
summary(data)
##  depression_pct   anxiety_pct    suicide_rate_per100k psychiatrists_per100k
##  Min.   :2.900   Min.   :3.800   Min.   : 3.200       Min.   : 0.020       
##  1st Qu.:3.800   1st Qu.:4.800   1st Qu.: 7.575       1st Qu.: 0.500       
##  Median :4.400   Median :5.200   Median : 9.800       Median : 3.150       
##  Mean   :4.402   Mean   :5.633   Mean   :11.560       Mean   : 7.182       
##  3rd Qu.:4.900   3rd Qu.:6.100   3rd Qu.:12.825       3rd Qu.:12.450       
##  Max.   :6.300   Max.   :9.300   Max.   :87.500       Max.   :54.900       
##  mh_budget_pct_health mh_spend_usd_per_capita treatment_gap_pct
##  Min.   : 0.600       Min.   :  0.100         Min.   :17.00    
##  1st Qu.: 1.200       1st Qu.:  0.725         1st Qu.:26.00    
##  Median : 2.850       Median :  8.000         Median :64.50    
##  Mean   : 3.759       Mean   : 38.951         Mean   :58.93    
##  3rd Qu.: 5.650       3rd Qu.: 49.750         3rd Qu.:84.50    
##  Max.   :11.300       Max.   :320.000         Max.   :97.00    
##  social_media_hours_daily internet_penetration_pct gdp_per_capita_usd
##  Min.   :0.10             Min.   : 8.00            Min.   :   600    
##  1st Qu.:1.60             1st Qu.:68.75            1st Qu.:  3975    
##  Median :2.10             Median :82.50            Median : 10150    
##  Mean   :2.21             Mean   :73.75            Mean   : 22232    
##  3rd Qu.:2.80             3rd Qu.:92.25            3rd Qu.: 33975    
##  Max.   :4.80             Max.   :99.00            Max.   :106900    
##  population_millions covid_mh_increase_pct youth_mh_crisis_score
##  Min.   :   0.700    Min.   :26.00         Min.   :5.100        
##  1st Qu.:   9.475    1st Qu.:34.75         1st Qu.:6.300        
##  Median :  24.750    Median :48.00         Median :7.500        
##  Mean   :  76.966    Mean   :49.63         Mean   :7.411        
##  3rd Qu.:  59.375    3rd Qu.:62.00         3rd Qu.:8.400        
##  Max.   :1428.600    Max.   :86.00         Max.   :9.500        
##  mh_crisis_index total_affected_millions psychiatrists_per_million
##  Min.   :36.00   Min.   :  0.0500        Min.   :  0.20           
##  1st Qu.:45.75   1st Qu.:  0.8575        1st Qu.:  5.00           
##  Median :58.00   Median :  2.2250        Median : 31.50           
##  Mean   :58.29   Mean   :  7.3409        Mean   : 71.82           
##  3rd Qu.:69.00   3rd Qu.:  5.9775        3rd Qu.:124.50           
##  Max.   :87.00   Max.   :121.4300        Max.   :549.00           
##  mh_investment_gap depression_anxiety_comorbidity_est_pct mh_system_score  
##  Min.   :0.0580    Min.   :1.450                          Min.   :  3.100  
##  1st Qu.:0.1060    1st Qu.:1.900                          1st Qu.:  9.025  
##  Median :0.1445    Median :2.200                          Median : 23.700  
##  Mean   :0.1464    Mean   :2.201                          Mean   : 32.959  
##  3rd Qu.:0.1790    3rd Qu.:2.450                          3rd Qu.: 55.875  
##  Max.   :0.2440    Max.   :3.150                          Max.   :100.000  
##  income_group_code
##  Min.   :1.000    
##  1st Qu.:2.000    
##  Median :3.000    
##  Mean   :2.924    
##  3rd Qu.:4.000    
##  Max.   :4.000

Menetukan Jumlah Clustering Elbow Method

fviz_nbclust(data_scaled, kmeans, method = "wss") +
  ggtitle("Elbow Method")

Silhouette Method

fviz_nbclust(data_scaled, kmeans, method = "silhouette") +
  ggtitle("Silhouette Method")

CLUSTERING 1. K-Means

set.seed(123)
kmeans_res <- kmeans(data_scaled, centers = 3, nstart = 25)
  1. K-Median
kmedian_res <- pam(data_scaled, k = 3)
  1. DBSCAN
db_res <- dbscan(data_scaled, 2, 5)
table(db_res$cluster)
## 
##  0  1  2 
## 25 34 33
  1. Mean Shift
ms_res <- meanShift(data_scaled)
  1. Fuzzy C-Means
fcm_res <- cmeans(data_scaled, centers = 3)

Evaluasi

dist_mat <- dist(data_scaled)

safe_silhouette <- function(cluster, dist_mat){
  if(length(unique(cluster)) > 1){
    sil <- silhouette(cluster, dist_mat)
    return(mean(sil[,3]))
  } else {
    return(NA)
  }
}
score_kmeans <- safe_silhouette(kmeans_res$cluster, dist_mat)
score_kmedian <- safe_silhouette(kmedian_res$clustering, dist_mat)

# DBSCAN 
cluster_db <- db_res$cluster
if(length(unique(cluster_db[cluster_db != 0])) > 1){
  data_db <- data_scaled[cluster_db != 0, ]
  dist_db <- dist(data_db)
  score_db <- mean(silhouette(cluster_db[cluster_db != 0], dist_db)[,3])
} else {
  score_db <- NA
}

# Mean Shift
score_ms <- safe_silhouette(ms_res$assignment, dist_mat)

# Fuzzy
cluster_fcm <- apply(fcm_res$membership, 1, which.max)
score_fcm <- safe_silhouette(cluster_fcm, dist_mat)

Hasil Evaluasi

results <- data.frame(
  Method = c("KMeans","KMedian","DBSCAN","MeanShift","FuzzyCMeans"),
  Silhouette = c(score_kmeans,
                 score_kmedian,
                 score_db,
                 score_ms,
                 score_fcm)
)

print(results)
##        Method  Silhouette
## 1      KMeans  0.29477900
## 2     KMedian  0.27343687
## 3      DBSCAN  0.40980347
## 4   MeanShift -0.01533905
## 5 FuzzyCMeans  0.28688821

Metode Terbaik

best_method <- results[which.max(results$Silhouette), ]
print(best_method)
##   Method Silhouette
## 3 DBSCAN  0.4098035

ANALISIS DATA EKSPLORATIF Pakai K-Means

df$cluster <- kmeans_res$cluster
df %>%
  group_by(cluster) %>%
  summarise(across(where(is.numeric), \(x) mean(x, na.rm = TRUE)))
## # A tibble: 3 × 22
##   cluster depression_pct anxiety_pct suicide_rate_per100k psychiatrists_per100k
##     <int>          <dbl>       <dbl>                <dbl>                 <dbl>
## 1       1           3.71        4.82                 13.0                 0.276
## 2       2           4.88        5.60                 12.9                17.9  
## 3       3           4.5         6.25                  9.4                 3.27 
## # ℹ 17 more variables: mh_budget_pct_health <dbl>,
## #   mh_spend_usd_per_capita <dbl>, treatment_gap_pct <dbl>,
## #   social_media_hours_daily <dbl>, internet_penetration_pct <dbl>,
## #   gdp_per_capita_usd <dbl>, population_millions <dbl>,
## #   covid_mh_increase_pct <dbl>, youth_mh_crisis_score <dbl>,
## #   mh_crisis_index <dbl>, total_affected_millions <dbl>,
## #   psychiatrists_per_million <dbl>, mh_investment_gap <dbl>, …

Visualisasi

pca_res <- prcomp(data_scaled)
pca_data <- data.frame(pca_res$x[,1:2])
pca_data$cluster <- factor(kmeans_res$cluster)

ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
  geom_point(size = 3) +
  ggtitle("Visualisasi Cluster dengan K-Means") + 
  theme_minimal()

pca_data$cluster <- factor(kmedian_res$clustering)

ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
  geom_point(size = 3) +
  ggtitle("Visualisasi Clustering K-Median") +
  theme_minimal()

pca_data$cluster <- factor(db_res$cluster)

ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
  geom_point(size = 3) +
  ggtitle("Visualisasi Clustering DBSCAN") +
  theme_minimal()

pca_data$cluster <- factor(ms_res$assignment)

ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
  geom_point(size = 3) +
  ggtitle("Visualisasi Clustering Mean Shift") +
  theme_minimal()

cluster_fcm <- apply(fcm_res$membership, 1, which.max)
pca_data$cluster <- factor(cluster_fcm)

ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
  geom_point(size = 3) +
  ggtitle("Visualisasi Clustering Fuzzy C-Means") +
  theme_minimal()

ggplot(df, aes(x = anxiety_pct, y = depression_pct, color = factor(cluster))) +
  geom_point(size = 3) +
  theme_minimal()