Load Library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cluster)
library(factoextra)
## Welcome to factoextra!
## Want to learn more? See two factoextra-related books at https://www.datanovia.com/en/product/practical-guide-to-principal-component-methods-in-r/
library(dbscan)
##
## Attaching package: 'dbscan'
##
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(e1071)
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:ggplot2':
##
## element
library(meanShiftR)
Load Data
df <- read_csv("Tugas Sem 4/Global_Mental_Health_Crisis_Index_2026.csv")
## Rows: 92 Columns: 29
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): country, iso3, region, income_group, mh_policy_exists, mh_law_exis...
## dbl (21): depression_pct, anxiety_pct, suicide_rate_per100k, psychiatrists_p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
str(df)
## spc_tbl_ [92 × 29] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ country : chr [1:92] "Malawi" "Mozambique" "Ethiopia" "Zimbabwe" ...
## $ iso3 : chr [1:92] "MWI" "MOZ" "ETH" "ZWE" ...
## $ region : chr [1:92] "Africa" "Africa" "Africa" "Africa" ...
## $ income_group : chr [1:92] "Low" "Low" "Low" "Low" ...
## $ depression_pct : num [1:92] 3.3 3.4 3.5 4 3.6 3.5 4.8 4.5 6.3 3.9 ...
## $ anxiety_pct : num [1:92] 4.5 4.6 4.8 5.2 4.9 4.8 5.8 5.5 7.1 5.2 ...
## $ suicide_rate_per100k : num [1:92] 6.5 8.2 10.2 15.4 8.6 12 87.5 40.5 18.5 9 ...
## $ psychiatrists_per100k : num [1:92] 0.02 0.04 0.04 0.05 0.04 0.08 0.05 0.08 3.1 0.1 ...
## $ mh_budget_pct_health : num [1:92] 0.8 0.6 1 0.7 0.8 1 1.1 1 2.8 0.9 ...
## $ mh_spend_usd_per_capita : num [1:92] 0.1 0.1 0.1 0.1 0.1 0.1 0.2 0.2 8 0.1 ...
## $ treatment_gap_pct : num [1:92] 97 96 95 94 94 93 93 92 62 90 ...
## $ social_media_hours_daily : num [1:92] 1.1 1.2 1.4 2.1 1.6 1.8 1.3 1.4 2.8 4.8 ...
## $ internet_penetration_pct : num [1:92] 16 18 22 35 28 24 29 41 72 55 ...
## $ gdp_per_capita_usd : num [1:92] 700 600 1200 1000 1200 ...
## $ population_millions : num [1:92] 20.4 32.8 123.4 15.9 65.5 ...
## $ covid_mh_increase_pct : num [1:92] 86 85 82 82 80 80 71 72 45 72 ...
## $ youth_mh_crisis_score : num [1:92] 9.5 9.4 9.3 9.3 9.2 9.1 9.2 9.1 8.5 9.1 ...
## $ mh_policy_exists : chr [1:92] "No" "No" "No" "No" ...
## $ mh_law_exists : chr [1:92] "No" "No" "No" "No" ...
## $ mh_crisis_index : num [1:92] 87 86 84 83 82 81 80 79 78 78 ...
## $ total_affected_millions : num [1:92] 1.59 2.62 10.24 1.46 5.57 ...
## $ psychiatrists_per_million : num [1:92] 0.2 0.4 0.4 0.5 0.4 0.8 0.5 0.8 31 1 ...
## $ mh_investment_gap : num [1:92] 0.078 0.058 0.095 0.066 0.075 0.093 0.102 0.092 0.174 0.081 ...
## $ social_media_mental_health_risk : chr [1:92] "Low (<1.5h)" "Low (<1.5h)" "Low (<1.5h)" "Moderate (1.5-2.5h)" ...
## $ depression_anxiety_comorbidity_est_pct: num [1:92] 1.65 1.7 1.75 2 1.8 1.75 2.4 2.25 3.15 1.95 ...
## $ mh_system_score : num [1:92] 3.4 3.1 4.7 4.1 4.4 5.4 5.6 5.7 24.2 6 ...
## $ income_group_code : num [1:92] 1 1 1 1 1 1 2 2 3 2 ...
## $ data_year : num [1:92] 2025 2025 2025 2025 2025 ...
## $ data_source : chr [1:92] "WHO Mental Health Atlas 2024 | GBD Study 2023 (IHME) | Our World in Data (Mar 2026) | OECD 2024 | DataReportal 2025" "WHO Mental Health Atlas 2024 | GBD Study 2023 (IHME) | Our World in Data (Mar 2026) | OECD 2024 | DataReportal 2025" "WHO Mental Health Atlas 2024 | GBD Study 2023 (IHME) | Our World in Data (Mar 2026) | OECD 2024 | DataReportal 2025" "WHO Mental Health Atlas 2024 | GBD Study 2023 (IHME) | Our World in Data (Mar 2026) | OECD 2024 | DataReportal 2025" ...
## - attr(*, "spec")=
## .. cols(
## .. country = col_character(),
## .. iso3 = col_character(),
## .. region = col_character(),
## .. income_group = col_character(),
## .. depression_pct = col_double(),
## .. anxiety_pct = col_double(),
## .. suicide_rate_per100k = col_double(),
## .. psychiatrists_per100k = col_double(),
## .. mh_budget_pct_health = col_double(),
## .. mh_spend_usd_per_capita = col_double(),
## .. treatment_gap_pct = col_double(),
## .. social_media_hours_daily = col_double(),
## .. internet_penetration_pct = col_double(),
## .. gdp_per_capita_usd = col_double(),
## .. population_millions = col_double(),
## .. covid_mh_increase_pct = col_double(),
## .. youth_mh_crisis_score = col_double(),
## .. mh_policy_exists = col_character(),
## .. mh_law_exists = col_character(),
## .. mh_crisis_index = col_double(),
## .. total_affected_millions = col_double(),
## .. psychiatrists_per_million = col_double(),
## .. mh_investment_gap = col_double(),
## .. social_media_mental_health_risk = col_character(),
## .. depression_anxiety_comorbidity_est_pct = col_double(),
## .. mh_system_score = col_double(),
## .. income_group_code = col_double(),
## .. data_year = col_double(),
## .. data_source = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
Ambil data Numerik
data <- df %>%
select(where(is.numeric))
Bersihkan data
data <- na.omit(data)
data <- data[apply(data, 1, function(x) all(is.finite(x))), ]
data <- data[, apply(data, 2, var) != 0]
Standarisasi
data_scaled <- scale(data)
Cek data
sum(is.na(data_scaled))
## [1] 0
sum(is.infinite(data_scaled))
## [1] 0
summary(data)
## depression_pct anxiety_pct suicide_rate_per100k psychiatrists_per100k
## Min. :2.900 Min. :3.800 Min. : 3.200 Min. : 0.020
## 1st Qu.:3.800 1st Qu.:4.800 1st Qu.: 7.575 1st Qu.: 0.500
## Median :4.400 Median :5.200 Median : 9.800 Median : 3.150
## Mean :4.402 Mean :5.633 Mean :11.560 Mean : 7.182
## 3rd Qu.:4.900 3rd Qu.:6.100 3rd Qu.:12.825 3rd Qu.:12.450
## Max. :6.300 Max. :9.300 Max. :87.500 Max. :54.900
## mh_budget_pct_health mh_spend_usd_per_capita treatment_gap_pct
## Min. : 0.600 Min. : 0.100 Min. :17.00
## 1st Qu.: 1.200 1st Qu.: 0.725 1st Qu.:26.00
## Median : 2.850 Median : 8.000 Median :64.50
## Mean : 3.759 Mean : 38.951 Mean :58.93
## 3rd Qu.: 5.650 3rd Qu.: 49.750 3rd Qu.:84.50
## Max. :11.300 Max. :320.000 Max. :97.00
## social_media_hours_daily internet_penetration_pct gdp_per_capita_usd
## Min. :0.10 Min. : 8.00 Min. : 600
## 1st Qu.:1.60 1st Qu.:68.75 1st Qu.: 3975
## Median :2.10 Median :82.50 Median : 10150
## Mean :2.21 Mean :73.75 Mean : 22232
## 3rd Qu.:2.80 3rd Qu.:92.25 3rd Qu.: 33975
## Max. :4.80 Max. :99.00 Max. :106900
## population_millions covid_mh_increase_pct youth_mh_crisis_score
## Min. : 0.700 Min. :26.00 Min. :5.100
## 1st Qu.: 9.475 1st Qu.:34.75 1st Qu.:6.300
## Median : 24.750 Median :48.00 Median :7.500
## Mean : 76.966 Mean :49.63 Mean :7.411
## 3rd Qu.: 59.375 3rd Qu.:62.00 3rd Qu.:8.400
## Max. :1428.600 Max. :86.00 Max. :9.500
## mh_crisis_index total_affected_millions psychiatrists_per_million
## Min. :36.00 Min. : 0.0500 Min. : 0.20
## 1st Qu.:45.75 1st Qu.: 0.8575 1st Qu.: 5.00
## Median :58.00 Median : 2.2250 Median : 31.50
## Mean :58.29 Mean : 7.3409 Mean : 71.82
## 3rd Qu.:69.00 3rd Qu.: 5.9775 3rd Qu.:124.50
## Max. :87.00 Max. :121.4300 Max. :549.00
## mh_investment_gap depression_anxiety_comorbidity_est_pct mh_system_score
## Min. :0.0580 Min. :1.450 Min. : 3.100
## 1st Qu.:0.1060 1st Qu.:1.900 1st Qu.: 9.025
## Median :0.1445 Median :2.200 Median : 23.700
## Mean :0.1464 Mean :2.201 Mean : 32.959
## 3rd Qu.:0.1790 3rd Qu.:2.450 3rd Qu.: 55.875
## Max. :0.2440 Max. :3.150 Max. :100.000
## income_group_code
## Min. :1.000
## 1st Qu.:2.000
## Median :3.000
## Mean :2.924
## 3rd Qu.:4.000
## Max. :4.000
Menetukan Jumlah Clustering Elbow Method
fviz_nbclust(data_scaled, kmeans, method = "wss") +
ggtitle("Elbow Method")
Silhouette Method
fviz_nbclust(data_scaled, kmeans, method = "silhouette") +
ggtitle("Silhouette Method")
CLUSTERING 1. K-Means
set.seed(123)
kmeans_res <- kmeans(data_scaled, centers = 3, nstart = 25)
kmedian_res <- pam(data_scaled, k = 3)
db_res <- dbscan(data_scaled, 2, 5)
table(db_res$cluster)
##
## 0 1 2
## 25 34 33
ms_res <- meanShift(data_scaled)
fcm_res <- cmeans(data_scaled, centers = 3)
Evaluasi
dist_mat <- dist(data_scaled)
safe_silhouette <- function(cluster, dist_mat){
if(length(unique(cluster)) > 1){
sil <- silhouette(cluster, dist_mat)
return(mean(sil[,3]))
} else {
return(NA)
}
}
score_kmeans <- safe_silhouette(kmeans_res$cluster, dist_mat)
score_kmedian <- safe_silhouette(kmedian_res$clustering, dist_mat)
# DBSCAN
cluster_db <- db_res$cluster
if(length(unique(cluster_db[cluster_db != 0])) > 1){
data_db <- data_scaled[cluster_db != 0, ]
dist_db <- dist(data_db)
score_db <- mean(silhouette(cluster_db[cluster_db != 0], dist_db)[,3])
} else {
score_db <- NA
}
# Mean Shift
score_ms <- safe_silhouette(ms_res$assignment, dist_mat)
# Fuzzy
cluster_fcm <- apply(fcm_res$membership, 1, which.max)
score_fcm <- safe_silhouette(cluster_fcm, dist_mat)
Hasil Evaluasi
results <- data.frame(
Method = c("KMeans","KMedian","DBSCAN","MeanShift","FuzzyCMeans"),
Silhouette = c(score_kmeans,
score_kmedian,
score_db,
score_ms,
score_fcm)
)
print(results)
## Method Silhouette
## 1 KMeans 0.29477900
## 2 KMedian 0.27343687
## 3 DBSCAN 0.40980347
## 4 MeanShift -0.01533905
## 5 FuzzyCMeans 0.28688821
Metode Terbaik
best_method <- results[which.max(results$Silhouette), ]
print(best_method)
## Method Silhouette
## 3 DBSCAN 0.4098035
ANALISIS DATA EKSPLORATIF Pakai K-Means
df$cluster <- kmeans_res$cluster
df %>%
group_by(cluster) %>%
summarise(across(where(is.numeric), \(x) mean(x, na.rm = TRUE)))
## # A tibble: 3 × 22
## cluster depression_pct anxiety_pct suicide_rate_per100k psychiatrists_per100k
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 1 3.71 4.82 13.0 0.276
## 2 2 4.88 5.60 12.9 17.9
## 3 3 4.5 6.25 9.4 3.27
## # ℹ 17 more variables: mh_budget_pct_health <dbl>,
## # mh_spend_usd_per_capita <dbl>, treatment_gap_pct <dbl>,
## # social_media_hours_daily <dbl>, internet_penetration_pct <dbl>,
## # gdp_per_capita_usd <dbl>, population_millions <dbl>,
## # covid_mh_increase_pct <dbl>, youth_mh_crisis_score <dbl>,
## # mh_crisis_index <dbl>, total_affected_millions <dbl>,
## # psychiatrists_per_million <dbl>, mh_investment_gap <dbl>, …
Visualisasi
pca_res <- prcomp(data_scaled)
pca_data <- data.frame(pca_res$x[,1:2])
pca_data$cluster <- factor(kmeans_res$cluster)
ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
geom_point(size = 3) +
ggtitle("Visualisasi Cluster dengan K-Means") +
theme_minimal()
pca_data$cluster <- factor(kmedian_res$clustering)
ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
geom_point(size = 3) +
ggtitle("Visualisasi Clustering K-Median") +
theme_minimal()
pca_data$cluster <- factor(db_res$cluster)
ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
geom_point(size = 3) +
ggtitle("Visualisasi Clustering DBSCAN") +
theme_minimal()
pca_data$cluster <- factor(ms_res$assignment)
ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
geom_point(size = 3) +
ggtitle("Visualisasi Clustering Mean Shift") +
theme_minimal()
cluster_fcm <- apply(fcm_res$membership, 1, which.max)
pca_data$cluster <- factor(cluster_fcm)
ggplot(pca_data, aes(PC1, PC2, color = cluster)) +
geom_point(size = 3) +
ggtitle("Visualisasi Clustering Fuzzy C-Means") +
theme_minimal()
ggplot(df, aes(x = anxiety_pct, y = depression_pct, color = factor(cluster))) +
geom_point(size = 3) +
theme_minimal()