library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(psych)
library(corrplot)
## corrplot 0.95 loaded
dataset <- read_csv("Dataset_Iklim_dan_Kesehatan_Global_(2015-2025).csv")
## Rows: 14100 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): country_code, country_name, region, income_level
## dbl (25): record_id, year, month, week, latitude, longitude, population_mil...
## date (1): date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(dataset)
## # A tibble: 6 × 30
## record_id country_code country_name region income_level date year month
## <dbl> <chr> <chr> <chr> <chr> <date> <dbl> <dbl>
## 1 1 USA United Stat… North… High 2015-01-04 2015 1
## 2 2 USA United Stat… North… High 2015-01-11 2015 1
## 3 3 USA United Stat… North… High 2015-01-18 2015 1
## 4 4 USA United Stat… North… High 2015-01-25 2015 1
## 5 5 USA United Stat… North… High 2015-02-01 2015 2
## 6 6 USA United Stat… North… High 2015-02-08 2015 2
## # ℹ 22 more variables: week <dbl>, latitude <dbl>, longitude <dbl>,
## # population_millions <dbl>, temperature_celsius <dbl>,
## # temp_anomaly_celsius <dbl>, precipitation_mm <dbl>, heat_wave_days <dbl>,
## # drought_indicator <dbl>, flood_indicator <dbl>,
## # extreme_weather_events <dbl>, pm25_ugm3 <dbl>, air_quality_index <dbl>,
## # respiratory_disease_rate <dbl>, cardio_mortality_rate <dbl>,
## # vector_disease_risk_score <dbl>, waterborne_disease_incidents <dbl>, …
dim(dataset)
## [1] 14100 30
str(dataset)
## spc_tbl_ [14,100 × 30] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ record_id : num [1:14100] 1 2 3 4 5 6 7 8 9 10 ...
## $ country_code : chr [1:14100] "USA" "USA" "USA" "USA" ...
## $ country_name : chr [1:14100] "United States" "United States" "United States" "United States" ...
## $ region : chr [1:14100] "North America" "North America" "North America" "North America" ...
## $ income_level : chr [1:14100] "High" "High" "High" "High" ...
## $ date : Date[1:14100], format: "2015-01-04" "2015-01-11" ...
## $ year : num [1:14100] 2015 2015 2015 2015 2015 ...
## $ month : num [1:14100] 1 1 1 1 2 2 2 2 3 3 ...
## $ week : num [1:14100] 1 2 3 4 5 6 7 8 9 10 ...
## $ latitude : num [1:14100] 37.1 37.1 37.1 37.1 37.1 ...
## $ longitude : num [1:14100] -95.7 -95.7 -95.7 -95.7 -95.7 ...
## $ population_millions : num [1:14100] 331 331 331 331 331 331 331 331 331 331 ...
## $ temperature_celsius : num [1:14100] 4.59 3.13 3.99 6.43 9 6.44 6.86 8.34 9.58 8.25 ...
## $ temp_anomaly_celsius : num [1:14100] 0.76 -0.5 -0.14 -0.06 0.47 0.18 0.38 0.64 -0.83 0.71 ...
## $ precipitation_mm : num [1:14100] 75.7 97 74.1 87.7 75.8 ...
## $ heat_wave_days : num [1:14100] 0 0 0 0 1 0 0 1 0 1 ...
## $ drought_indicator : num [1:14100] 0 0 0 0 0 0 0 0 0 0 ...
## $ flood_indicator : num [1:14100] 0 0 0 0 0 0 0 0 0 0 ...
## $ extreme_weather_events : num [1:14100] 0 0 0 0 1 0 0 1 0 1 ...
## $ pm25_ugm3 : num [1:14100] 39 17.9 91.5 5.5 37.1 ...
## $ air_quality_index : num [1:14100] 82 6 137 -3 48 157 51 5 29 22 ...
## $ respiratory_disease_rate : num [1:14100] 69.4 70 66.9 47 61.3 80.2 70.3 48 80.3 59.1 ...
## $ cardio_mortality_rate : num [1:14100] 31.5 26.3 33.4 35 28.3 30.6 33.2 33.3 24.9 32.5 ...
## $ vector_disease_risk_score : num [1:14100] 6.6 5.2 1.3 6 1.4 0.7 5.9 6.4 5.9 0.3 ...
## $ waterborne_disease_incidents: num [1:14100] 16.2 11.4 19.5 9.7 22.6 23.7 25.4 17.7 18.3 18.8 ...
## $ heat_related_admissions : num [1:14100] 1.4 0 0 9 27.3 11.9 9.1 20.3 7.8 13.7 ...
## $ healthcare_access_index : num [1:14100] 77.3 83.6 84.7 84.3 83.6 78 81.7 86.7 84.9 87.1 ...
## $ gdp_per_capita_usd : num [1:14100] 63627 63627 63627 63627 63733 ...
## $ mental_health_index : num [1:14100] 71.2 70.6 63.4 68.1 69.1 70.1 68.5 59.4 75.7 71.9 ...
## $ food_security_index : num [1:14100] 90.2 94 100 96.4 100 97.1 100 95 100 95.2 ...
## - attr(*, "spec")=
## .. cols(
## .. record_id = col_double(),
## .. country_code = col_character(),
## .. country_name = col_character(),
## .. region = col_character(),
## .. income_level = col_character(),
## .. date = col_date(format = ""),
## .. year = col_double(),
## .. month = col_double(),
## .. week = col_double(),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. population_millions = col_double(),
## .. temperature_celsius = col_double(),
## .. temp_anomaly_celsius = col_double(),
## .. precipitation_mm = col_double(),
## .. heat_wave_days = col_double(),
## .. drought_indicator = col_double(),
## .. flood_indicator = col_double(),
## .. extreme_weather_events = col_double(),
## .. pm25_ugm3 = col_double(),
## .. air_quality_index = col_double(),
## .. respiratory_disease_rate = col_double(),
## .. cardio_mortality_rate = col_double(),
## .. vector_disease_risk_score = col_double(),
## .. waterborne_disease_incidents = col_double(),
## .. heat_related_admissions = col_double(),
## .. healthcare_access_index = col_double(),
## .. gdp_per_capita_usd = col_double(),
## .. mental_health_index = col_double(),
## .. food_security_index = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(dataset)
## record_id country_code country_name region
## Min. : 1 Length:14100 Length:14100 Length:14100
## 1st Qu.: 3526 Class :character Class :character Class :character
## Median : 7050 Mode :character Mode :character Mode :character
## Mean : 7050
## 3rd Qu.:10575
## Max. :14100
## income_level date year month
## Length:14100 Min. :2015-01-04 Min. :2015 Min. : 1.000
## Class :character 1st Qu.:2017-09-15 1st Qu.:2017 1st Qu.: 3.000
## Mode :character Median :2020-05-27 Median :2020 Median : 6.000
## Mean :2020-05-27 Mean :2020 Mean : 6.438
## 3rd Qu.:2023-02-06 3rd Qu.:2023 3rd Qu.: 9.000
## Max. :2025-10-19 Max. :2025 Max. :12.000
## week latitude longitude population_millions
## Min. : 1.00 Min. :-38.41 Min. :-106.34 Min. : 26.0
## 1st Qu.:13.00 1st Qu.: 4.57 1st Qu.: -3.74 1st Qu.: 60.0
## Median :26.00 Median : 23.63 Median : 22.93 Median : 98.0
## Mean :26.22 Mean : 18.91 Mean : 27.35 Mean : 217.4
## 3rd Qu.:39.00 3rd Qu.: 37.09 3rd Qu.: 100.99 3rd Qu.: 206.0
## Max. :53.00 Max. : 56.13 Max. : 138.25 Max. :1411.0
## temperature_celsius temp_anomaly_celsius precipitation_mm heat_wave_days
## Min. :-20.7400 Min. :-1.7600 Min. : 0.00 Min. :0.0000
## 1st Qu.: 0.2575 1st Qu.:-0.2200 1st Qu.: 51.50 1st Qu.:0.0000
## Median : 8.6300 Median : 0.1200 Median : 80.90 Median :0.0000
## Mean : 8.6071 Mean : 0.1209 Mean : 81.15 Mean :0.1481
## 3rd Qu.: 16.8500 3rd Qu.: 0.4600 3rd Qu.:110.60 3rd Qu.:0.0000
## Max. : 38.3300 Max. : 2.1200 Max. :222.90 Max. :5.0000
## drought_indicator flood_indicator extreme_weather_events pm25_ugm3
## Min. :0.00000 Min. :0.000000 Min. :0.0000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.: 36.60
## Median :0.00000 Median :0.000000 Median :0.0000 Median : 61.20
## Mean :0.01156 Mean :0.005887 Mean :0.1655 Mean : 65.77
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.: 94.30
## Max. :1.00000 Max. :1.000000 Max. :5.0000 Max. :189.50
## air_quality_index respiratory_disease_rate cardio_mortality_rate
## Min. :-52.00 Min. : 21.20 Min. : 8.00
## 1st Qu.: 54.00 1st Qu.: 59.20 1st Qu.:27.00
## Median : 93.00 Median : 69.60 Median :30.80
## Mean : 98.75 Mean : 70.02 Mean :30.76
## 3rd Qu.:142.00 3rd Qu.: 80.50 3rd Qu.:34.50
## Max. :302.00 Max. :131.80 Max. :51.10
## vector_disease_risk_score waterborne_disease_incidents heat_related_admissions
## Min. : 0.0 Min. : 0.00 Min. : 0.000
## 1st Qu.: 3.5 1st Qu.:17.80 1st Qu.: 0.000
## Median : 5.8 Median :22.10 Median : 0.600
## Mean : 12.1 Mean :22.17 Mean : 6.881
## 3rd Qu.: 8.9 3rd Qu.:26.40 3rd Qu.:12.300
## Max. :100.0 Max. :63.00 Max. :59.600
## healthcare_access_index gdp_per_capita_usd mental_health_index
## Min. : 27.90 Min. : 1565 Min. :47.70
## 1st Qu.: 47.10 1st Qu.: 3548 1st Qu.:66.50
## Median : 66.60 Median :13941 Median :70.00
## Mean : 64.88 Mean :26331 Mean :69.87
## 3rd Qu.: 81.50 3rd Qu.:50207 3rd Qu.:73.30
## Max. :102.00 Max. :78630 Max. :88.70
## food_security_index
## Min. : 62.2
## 1st Qu.: 88.8
## Median : 94.4
## Mean : 93.3
## 3rd Qu.: 99.7
## Max. :100.0
# Hitung Total NA dalam Dataset
sum(is.na(dataset))
## [1] 0
Hasil pengecekan menunjukkan tidak terdapat missing value, sehingga data siap dianalisis lebih lanjut karena seluruh informasi pada setiap variabel telah terpenuhi.
dataset2 <- dataset %>%
filter(year == 2025) %>%
select(
Suhu = temperature_celsius,
Anomali_Suhu = temp_anomaly_celsius,
Curah_Hujan = precipitation_mm,
Hari_Panas = heat_wave_days,
Cuaca_Ekstrem = extreme_weather_events,
Polusi_PM25 = pm25_ugm3,
Kualitas_Udara = air_quality_index,
Penyakit_Napas = respiratory_disease_rate,
Kematian_Jantung = cardio_mortality_rate,
Risiko_Vektor = vector_disease_risk_score,
Penyakit_Air = waterborne_disease_incidents,
Rawat_Inap_Panas = heat_related_admissions
) %>%
na.omit() # Menghapus baris kosong agar PCA tidak error
head(dataset2)
## # A tibble: 6 × 12
## Suhu Anomali_Suhu Curah_Hujan Hari_Panas Cuaca_Ekstrem Polusi_PM25
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 5.38 1.24 96.6 0 0 48.8
## 2 5.22 1.01 137. 0 0 31.2
## 3 4.74 -0.76 123 0 0 27.8
## 4 3.67 0.37 129 0 0 23
## 5 4.15 0.54 97.6 0 0 20.8
## 6 8.09 -0.26 150. 2 2 10.5
## # ℹ 6 more variables: Kualitas_Udara <dbl>, Penyakit_Napas <dbl>,
## # Kematian_Jantung <dbl>, Risiko_Vektor <dbl>, Penyakit_Air <dbl>,
## # Rawat_Inap_Panas <dbl>
par(mfrow = c(3, 4), mar = c(3, 3, 2, 1), mgp = c(1.6, 0.5, 0))
for (i in 1:ncol(dataset2)) {
boxplot(dataset2[[i]],
main = colnames(dataset2)[i],
col = "skyblue",
border = "darkblue",
horizontal = TRUE,
cex.main = 0.8,
cex.axis = 0.7)
}
Berdasarkan gambar tersebut, sebagian besar variabel seperti Suhu, Curah
Hujan, dan Kualitas Udara memiliki sebaran data stabil dan simetris.
Namun, terdapat banyak outlier (titik ekstrem) pada variabel Hari Panas,
Risiko Vektor, dan Rawat Inap Panas. Hal ini menunjukkan bahwa pada
tahun 2025 terjadi lonjakan dampak kesehatan ekstrem di wilayah tertentu
yang jauh melampaui rata-rata dunia.
korelasi <- cor(dataset2)
corrplot(korelasi, method = "color", type = "upper",
tl.col = "black", tl.srt = 45, addCoef.col = "black",
number.cex = 0.6, main = "\n\nMatriks Korelasi")
Berdasarkan heatmap, variabel iklim dan kesehatan berhubungan erat;
warna biru tua menandakan jika satu variabel naik, yang lain cenderung
ikut naik. Misalnya, Hari Panas berkaitan sangat kuat dengan Cuaca
Ekstrem (0.90), dan Suhu dengan Penyakit Napas (0.68). Hubungan yang
kuat ini membuktikan data sangat layak dilanjutkan ke Analisis
Faktor.
uji_kmo <- KMO(dataset2)
print(uji_kmo)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = dataset2)
## Overall MSA = 0.71
## MSA for each item =
## Suhu Anomali_Suhu Curah_Hujan Hari_Panas
## 0.70 0.54 0.78 0.63
## Cuaca_Ekstrem Polusi_PM25 Kualitas_Udara Penyakit_Napas
## 0.62 0.66 0.68 0.93
## Kematian_Jantung Risiko_Vektor Penyakit_Air Rawat_Inap_Panas
## 0.59 0.71 0.77 0.86
Hasil Uji KMO sebesar 0.71 menunjukkan bahwa dataset layak dan cukup untuk dianalisis lebih lanjut menggunakan Analisis Faktor. Nilai ini berada di atas standar minimal (0.50), sehingga korelasi antar variabel dianggap memadai untuk dikelompokkan ke dalam beberapa faktor utama. ### Uji Bartlett (Mengecek korelasi antar variabel)
uji_bartlett <- cortest.bartlett(dataset2)
## R was not square, finding R from data
print(uji_bartlett)
## $chisq
## [1] 9522.79
##
## $p.value
## [1] 0
##
## $df
## [1] 66
Berdasarkan Uji Bartlett, nilai p-value adalah 0, yang berarti terdapat hubungan atau korelasi yang kuat antar variabel. Karena nilai p-value jauh di bawah 0,05, maka syarat Analisis Faktor terpenuhi dan proses bisa dilanjutkan karena data terbukti bukan sekumpulan variabel acak yang tidak berhubungan. ## Analisis Principal Component Analysis (PCA) ### Perhitungan PCA
hasil_pca <- prcomp(dataset2, center = TRUE, scale. = TRUE)
summary(hasil_pca)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.7820 1.7622 1.2042 1.1531 0.8729 0.8183 0.77670
## Proportion of Variance 0.2646 0.2588 0.1208 0.1108 0.0635 0.0558 0.05027
## Cumulative Proportion 0.2646 0.5234 0.6442 0.7551 0.8186 0.8744 0.92463
## PC8 PC9 PC10 PC11 PC12
## Standard deviation 0.56302 0.53613 0.49125 0.17970 0.16228
## Proportion of Variance 0.02642 0.02395 0.02011 0.00269 0.00219
## Cumulative Proportion 0.95105 0.97500 0.99511 0.99781 1.00000
Hasil PCA menunjukkan bahwa 4 komponen pertama sudah merangkum 75,5% informasi data. Karena itu, cukup digunakan 4 faktor saja untuk menyederhanakan variabel dalam analisis ini. ### Visualisasi Scree Plot
# Visualisasi Scree Plot
eigen_values <- hasil_pca$sdev^2 # Mengambil nilai Eigenvalue (Varians)
plot(eigen_values, type = "b", pch = 19, col = "deeppink",
main = "Scree Plot: Penentuan Faktor Utama",
xlab = "Komponen Utama (PC)",
ylab = "Eigenvalue (Varians)")
abline(h = 1, col = "blue", lty = 2)
Scree plot menunjukkan ada 4 komponen yang memiliki nilai eigenvalue
lebih dari 1. Oleh karena itu, diputuskan untuk mengambil 4 faktor
sebagai penyederhanaan data, karena setelah titik keempat, grafik mulai
melandai secara signifikan. # Struktur Komponen PCA
pca_final <- fa(dataset2, nfactors = 4, rotate = "varimax", fm = "pa")
par(mar = c(2, 10, 2, 2))
fa.diagram(pca_final,
main = "Struktur Komponen PCA",
simple = TRUE,
cex = 0.5,
rsize = 0.6,
cut = 0.4)
## Analisis Factor Analysis (FA) ### Pembentukan Model Faktor
fa_hasil <- fa(dataset2, nfactors = 4, rotate = "varimax", fm = "pa")
print(fa_hasil$loadings, cutoff = 0.4, sort = TRUE)
##
## Loadings:
## PA2 PA1 PA3 PA4
## Polusi_PM25 0.991
## Kualitas_Udara 0.964
## Penyakit_Napas 0.757
## Hari_Panas 0.989
## Cuaca_Ekstrem 0.952
## Rawat_Inap_Panas 0.659 0.464
## Suhu 0.898
## Risiko_Vektor 0.670
## Anomali_Suhu 0.650
## Kematian_Jantung 0.597
## Curah_Hujan 0.480
## Penyakit_Air
##
## PA2 PA1 PA3 PA4
## SS loadings 2.770 2.412 1.804 0.811
## Proportion Var 0.231 0.201 0.150 0.068
## Cumulative Var 0.231 0.432 0.582 0.650
print(fa_hasil$Vaccounted)
## PA2 PA1 PA3 PA4
## SS loadings 2.7696675 2.4123741 1.8042632 0.8106155
## Proportion Var 0.2308056 0.2010312 0.1503553 0.0675513
## Cumulative Var 0.2308056 0.4318368 0.5821921 0.6497434
## Proportion Explained 0.3552258 0.3094009 0.2314072 0.1039661
## Cumulative Proportion 0.3552258 0.6646267 0.8960339 1.0000000
par(mar = c(2, 2, 2, 2))
fa.diagram(fa_hasil,
main = "Struktur Faktor: Dampak Iklim & Kesehatan 2025",
digits = 2,
cut = 0.4,
simple = FALSE,
errors = FALSE,
e.size = 0.05,
rsize = 0.5,
cex = 0.8,
col = c("blue", "red"))