library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
library(broom)
## Warning: package 'broom' was built under R version 4.4.3
library(dplyr)
library(sf)
## Warning: package 'sf' was built under R version 4.4.3
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(spdep)
## Warning: package 'spdep' was built under R version 4.4.3
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.4.3
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(tmap)
## Warning: package 'tmap' was built under R version 4.4.3
library(geodata)
## Warning: package 'geodata' was built under R version 4.4.3
## Loading required package: terra
## Warning: package 'terra' was built under R version 4.4.3
## terra 1.8.60
##
## Attaching package: 'terra'
##
## The following object is masked from 'package:MASS':
##
## area
##
## The following object is masked from 'package:tidyr':
##
## extract
data <- read.csv("DATA UTS EPIDEMIOLOGI.csv", sep = ";", dec =".")
data
## KABKOT Populasi Y X1 X2
## 1 BOGOR 5682300 29110 1899 46.70
## 2 SUKABUMI 2828020 12411 679 32.83
## 3 CIANJUR 2584990 12197 712 38.29
## 4 BANDUNG 3753120 14012 2156 54.16
## 5 GARUT 2716950 9516 876 34.61
## 6 TASIKMALAYA 1920920 4881 710 37.62
## 7 CIAMIS 1259230 3477 789 67.22
## 8 KUNINGAN 1213930 4045 1018 77.55
## 9 CIREBON 2387960 9587 2228 80.39
## 10 MAJALENGKA 1352540 4517 1017 75.36
## 11 SUMEDANG 1187130 3930 758 69.00
## 12 INDRAMAYU 1914040 6092 922 85.78
## 13 SUBANG 1663160 6321 768 83.27
## 14 PURWAKARTA 1050340 4687 1058 66.51
## 15 KARAWANG 2554380 13474 1335 70.98
## 16 BEKASI 3273870 15458 2617 63.80
## 17 BANDUNG BARAT 1884190 4569 1468 38.87
## 18 PANGANDARAN 434100 968 385 61.91
## 19 KOTA BOGOR 1078350 11418 9683 49.53
## 20 KOTA SUKABUMI 365740 3477 7571 36.30
## 21 KOTA BANDUNG 2528160 18627 15176 41.97
## 22 KOTA CIREBON 344850 4122 8744 74.31
## 23 KOTA BEKASI 2644060 13640 12411 65.82
## 24 DEPOK 2163640 8422 10823 49.39
## 25 CIMAHI 598700 4513 14110 54.86
## 26 KOTA TASIKMALAYA 750730 4693 4081 45.01
## 27 BANJAR 209790 1519 1601 83.51
str(data)
## 'data.frame': 27 obs. of 5 variables:
## $ KABKOT : chr "BOGOR" "SUKABUMI" "CIANJUR" "BANDUNG" ...
## $ Populasi: int 5682300 2828020 2584990 3753120 2716950 1920920 1259230 1213930 2387960 1352540 ...
## $ Y : int 29110 12411 12197 14012 9516 4881 3477 4045 9587 4517 ...
## $ X1 : int 1899 679 712 2156 876 710 789 1018 2228 1017 ...
## $ X2 : num 46.7 32.8 38.3 54.2 34.6 ...
# Ubah nama kolom agar mudah dipakai
data <- data %>%
rename(
kabkot = KABKOT,
population = Populasi,
cases = Y,
density = X1,
housing = X2
)
# 2. Hitung Prevalensi TBC per 100.000 penduduk
data <- data %>%
mutate(
prevalence_per100k = (cases / population) * 100000,
housing_prop = housing / 100 # ubah persen jadi proporsi
)
# Lihat hasil
data
## kabkot population cases density housing prevalence_per100k
## 1 BOGOR 5682300 29110 1899 46.70 512.2926
## 2 SUKABUMI 2828020 12411 679 32.83 438.8583
## 3 CIANJUR 2584990 12197 712 38.29 471.8393
## 4 BANDUNG 3753120 14012 2156 54.16 373.3427
## 5 GARUT 2716950 9516 876 34.61 350.2457
## 6 TASIKMALAYA 1920920 4881 710 37.62 254.0970
## 7 CIAMIS 1259230 3477 789 67.22 276.1211
## 8 KUNINGAN 1213930 4045 1018 77.55 333.2153
## 9 CIREBON 2387960 9587 2228 80.39 401.4724
## 10 MAJALENGKA 1352540 4517 1017 75.36 333.9642
## 11 SUMEDANG 1187130 3930 758 69.00 331.0505
## 12 INDRAMAYU 1914040 6092 922 85.78 318.2797
## 13 SUBANG 1663160 6321 768 83.27 380.0596
## 14 PURWAKARTA 1050340 4687 1058 66.51 446.2365
## 15 KARAWANG 2554380 13474 1335 70.98 527.4861
## 16 BEKASI 3273870 15458 2617 63.80 472.1629
## 17 BANDUNG BARAT 1884190 4569 1468 38.87 242.4915
## 18 PANGANDARAN 434100 968 385 61.91 222.9901
## 19 KOTA BOGOR 1078350 11418 9683 49.53 1058.8399
## 20 KOTA SUKABUMI 365740 3477 7571 36.30 950.6753
## 21 KOTA BANDUNG 2528160 18627 15176 41.97 736.7809
## 22 KOTA CIREBON 344850 4122 8744 74.31 1195.3023
## 23 KOTA BEKASI 2644060 13640 12411 65.82 515.8733
## 24 DEPOK 2163640 8422 10823 49.39 389.2514
## 25 CIMAHI 598700 4513 14110 54.86 753.7999
## 26 KOTA TASIKMALAYA 750730 4693 4081 45.01 625.1249
## 27 BANJAR 209790 1519 1601 83.51 724.0574
## housing_prop
## 1 0.4670
## 2 0.3283
## 3 0.3829
## 4 0.5416
## 5 0.3461
## 6 0.3762
## 7 0.6722
## 8 0.7755
## 9 0.8039
## 10 0.7536
## 11 0.6900
## 12 0.8578
## 13 0.8327
## 14 0.6651
## 15 0.7098
## 16 0.6380
## 17 0.3887
## 18 0.6191
## 19 0.4953
## 20 0.3630
## 21 0.4197
## 22 0.7431
## 23 0.6582
## 24 0.4939
## 25 0.5486
## 26 0.4501
## 27 0.8351
data_deskriptif <- data %>%
dplyr::select(cases, population, density, housing, prevalence_per100k) %>%
summarise_all(list(
mean = ~mean(., na.rm = TRUE),
median = ~median(., na.rm = TRUE),
sd = ~sd(., na.rm = TRUE),
min = ~min(., na.rm = TRUE),
max = ~max(., na.rm = TRUE)
))
data_deskriptif
## cases_mean population_mean density_mean housing_mean prevalence_per100k_mean
## 1 8506.778 1864637 3910.926 58.72407 505.0337
## cases_median population_median density_median housing_median
## 1 6092 1884190 1468 61.91
## prevalence_per100k_median cases_sd population_sd density_sd housing_sd
## 1 438.8583 6240.981 1228053 4668.116 17.09696
## prevalence_per100k_sd cases_min population_min density_min housing_min
## 1 251.0959 968 209790 385 32.83
## prevalence_per100k_min cases_max population_max density_max housing_max
## 1 222.9901 29110 5682300 15176 85.78
## prevalence_per100k_max
## 1 1195.302
# --- 1. Ambil peta kabupaten/kota Indonesia dari GADM ---
# (akan otomatis download jika belum ada, lalu disimpan permanen di D:/Data/Peta_GADM)
indo_shp <- geodata::gadm(country = "IDN", level = 2, path = "D:/Maulana Hardy Rayyan/UNPAD/Epidemiologi/UTS Epidemiologi")
# Konversi ke format sf
indo_map <- sf::st_as_sf(indo_shp)
# Filter hanya Provinsi Jawa Barat
jabar <- indo_map %>%
filter(NAME_1 %in% c("West Java", "Jawa Barat")) %>%
mutate(KABKOT = NAME_2)
# Normalisasi huruf besar untuk join data
jabar$KABKOT <- str_to_upper(jabar$KABKOT)
data$KABKOT <- str_to_upper(data$kabkot)
# Gabungkan data CSV dan shapefile
data_spatial <- jabar %>%
left_join(data, by = "KABKOT")
# Cek struktur data hasil gabungan
glimpse(data_spatial)
## Rows: 27
## Columns: 22
## $ GID_2 <chr> "IDN.9.2_1", "IDN.9.1_1", "IDN.9.3_1", "IDN.9.4_1",…
## $ GID_0 <chr> "IDN", "IDN", "IDN", "IDN", "IDN", "IDN", "IDN", "I…
## $ COUNTRY <chr> "Indonesia", "Indonesia", "Indonesia", "Indonesia",…
## $ GID_1 <chr> "IDN.9_1", "IDN.9_1", "IDN.9_1", "IDN.9_1", "IDN.9_…
## $ NAME_1 <chr> "Jawa Barat", "Jawa Barat", "Jawa Barat", "Jawa Bar…
## $ NL_NAME_1 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ NAME_2 <chr> "Bandung", "Bandung Barat", "Banjar", "Bekasi", "Bo…
## $ VARNAME_2 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ NL_NAME_2 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ TYPE_2 <chr> "Kabupaten", "Kabupaten", "Kota", "Kabupaten", "Kab…
## $ ENGTYPE_2 <chr> "Regency", "Regency", "City", "Regency", "Regency",…
## $ CC_2 <chr> "3204", "3217", "3279", "3216", "3201", "3207", "32…
## $ HASC_2 <chr> "ID.JR.BU", "ID.JR.BB", "ID.JR.BA", "ID.JR.BS", "ID…
## $ KABKOT <chr> "BANDUNG", "BANDUNG BARAT", "BANJAR", "BEKASI", "BO…
## $ kabkot <chr> "BANDUNG", "BANDUNG BARAT", "BANJAR", "BEKASI", "BO…
## $ population <int> 3753120, 1884190, 209790, 3273870, 5682300, 1259230…
## $ cases <int> 14012, 4569, 1519, 15458, 29110, 3477, 12197, 4513,…
## $ density <int> 2156, 1468, 1601, 2617, 1899, 789, 712, 14110, 2228…
## $ housing <dbl> 54.16, 38.87, 83.51, 63.80, 46.70, 67.22, 38.29, 54…
## $ prevalence_per100k <dbl> 373.3427, 242.4915, 724.0574, 472.1629, 512.2926, 2…
## $ housing_prop <dbl> 0.5416, 0.3887, 0.8351, 0.6380, 0.4670, 0.6722, 0.3…
## $ geometry <GEOMETRY [°]> POLYGON ((107.6257 -7.29998..., POLYGON ((…
data_spatial <- data_spatial %>%
filter(!is.na(cases), !is.na(population), !is.na(density), !is.na(housing))
tmap_mode("plot")
## ℹ tmap modes "plot" - "view"
## ℹ toggle with `tmap::ttm()`
tm_shape(data_spatial) +
tm_polygons(
col = "cases", # pakai kolom 'cases' bukan 'Y'
palette = "Reds", # warna gradasi merah
title = "Jumlah Kasus TBC"
) +
tm_borders(lwd = 0.7, col = "gray40") +
tm_layout(
title = "Sebaran Kasus TBC di Kabupaten/Kota Jawa Barat (2024)",
legend.outside = TRUE,
frame = FALSE
)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: use 'fill' for the fill color of polygons/symbols
## (instead of 'col'), and 'col' for the outlines (instead of 'border.col').[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(title = )`[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.
# --- 5. Hitung Moran’s I Global ---
# Konversi sf → Spatial
map_sp <- as_Spatial(data_spatial)
# Buat matriks tetangga (queen contiguity)
nb <- poly2nb(map_sp)
# Buat bobot spasial (row-standardized)
lw <- nb2listw(nb, style = "W")
# Uji Moran's I global
moran_global <- moran.test(data_spatial$cases, lw)
moran_global
##
## Moran I test under randomisation
##
## data: data_spatial$cases
## weights: lw
##
## Moran I statistic standard deviate = 2.9347, p-value = 0.00167
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.35148462 -0.04000000 0.01779563
# --- 8. Hitung Local Moran’s I (LISA) ---
local_moran <- localmoran(st_drop_geometry(data_spatial)$cases, lw)
head(local_moran)
## Ii E.Ii Var.Ii Z.Ii Pr(z != E(Ii))
## 1 -0.03867622 -0.02966099 0.08017623 -0.03183863 0.974600737
## 2 -0.14554706 -0.01949157 0.06556356 -0.49230075 0.622506751
## 3 1.05549883 -0.05775959 1.41500884 0.93587190 0.349339144
## 4 1.80599497 -0.04838894 0.36582139 3.06595023 0.002169794
## 5 1.47678414 -0.44997849 0.56976067 2.55259539 0.010692363
## 6 0.70568331 -0.03086110 0.12960434 2.04592245 0.040764001
# --- 9. Klasifikasi LISA (HH, LL, HL, LH) ---
# 1️⃣ Ambil nilai kasus dan rata-ratanya
cases <- st_drop_geometry(data_spatial)$cases
mean_val <- mean(cases, na.rm = TRUE)
# 2️⃣ Hitung nilai lag spasial (rata-rata tetangga)
lag_cases <- lag.listw(lw, cases)
# 3️⃣ Buat klasifikasi berdasarkan kuadran LISA
lisa_cluster <- ifelse(cases > mean_val & lag_cases > mean(lag_cases), "High-High",
ifelse(cases < mean_val & lag_cases < mean(lag_cases), "Low-Low",
ifelse(cases > mean_val & lag_cases < mean(lag_cases), "High-Low",
ifelse(cases < mean_val & lag_cases > mean(lag_cases), "Low-High", "Undefined"))))
# 4️⃣ Tambahkan ke data spasial
data_spatial$lisa_cluster <- lisa_cluster
# 5️⃣ Cek distribusi klaster
table(data_spatial$lisa_cluster)
##
## High-High High-Low Low-High Low-Low
## 7 4 4 11
# 6️⃣ Visualisasikan peta LISA
tm_shape(data_spatial) +
tm_polygons(
col = "lisa_cluster",
palette = c("red", "blue", "pink", "lightblue", "gray"),
title = "Klaster LISA (Local Moran’s I)"
) +
tm_borders() +
tm_layout(
legend.outside = TRUE,
frame = FALSE
)
##
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'
# --- 1. Pastikan library corrplot sudah di-load ---
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
# --- 2. Pilih variabel numerik ---
data_num <- data %>%
dplyr::select(cases, density, housing)
# --- 3. Hitung matriks korelasi ---
cor_matrix <- cor(data_num, method = "pearson", use = "complete.obs")
# --- 4. Plot hasil korelasi ---
corrplot(
cor_matrix,
method = "color", # bisa juga "circle", "number", dsb.
type = "upper", # hanya tampilkan segitiga atas
order = "hclust", # urutkan berdasarkan clustering
addCoef.col = "black", # tampilkan nilai korelasi
tl.col = "black", # warna label
tl.srt = 45, # rotasi label
col = colorRampPalette(c("blue", "white", "red"))(200) # gradasi warna
)
# --- 2. Buat model regresi "dummy" ---
# Model ini hanya untuk menghitung VIF antar variabel bebas (density dan housing)
model_vif <- lm(cases ~ density + housing, data = data)
# --- 3. Hitung nilai VIF ---
vif_values <- car::vif(model_vif)
# --- 4. Tampilkan hasil ---
vif_values
## density housing
## 1.040387 1.040387
# 3. Model Cross-Sectional → Prevalence Rate Ratio (PRR) via Poisson Regression
model_pois <- glm(
cases ~ density + housing_prop + offset(log(population)),
family = poisson(link = "log"),
data = data
)
summary(model_pois)
##
## Call:
## glm(formula = cases ~ density + housing_prop + offset(log(population)),
## family = poisson(link = "log"), data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.482e+00 8.087e-03 -677.955 <2e-16 ***
## density 4.172e-05 4.125e-07 101.141 <2e-16 ***
## housing_prop -1.339e-01 1.339e-02 -9.999 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 27033 on 26 degrees of freedom
## Residual deviance: 17191 on 24 degrees of freedom
## AIC: 17484
##
## Number of Fisher Scoring iterations: 4
# 4. Konversi koefisien ke PRR (Prevalence Rate Ratio)
PRR <- tidy(model_pois, exponentiate = TRUE, conf.int = TRUE)
PRR
## # A tibble: 3 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.00416 0.00809 -678. 0 0.00409 0.00423
## 2 density 1.00 0.000000413 101. 0 1.00 1.00
## 3 housing_prop 0.875 0.0134 -10.0 1.54e-23 0.852 0.898
# 5. Cek Overdispersion
disp <- model_pois$deviance / model_pois$df.residual
disp
## [1] 716.3092
# Jika overdispersi → gunakan Negative Binomial
if (disp > 1) {
model_nb <- glm.nb(
cases ~ density + housing_prop + offset(log(population)),
data = data
)
summary(model_nb)
}
##
## Call:
## glm.nb(formula = cases ~ density + housing_prop + offset(log(population)),
## data = data, init.theta = 8.967353605, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.686e+00 2.551e-01 -22.289 <2e-16 ***
## density 6.687e-05 1.432e-05 4.671 3e-06 ***
## housing_prop 1.558e-01 3.911e-01 0.398 0.69
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(8.9674) family taken to be 1)
##
## Null deviance: 48.868 on 26 degrees of freedom
## Residual deviance: 27.527 on 24 degrees of freedom
## AIC: 501.31
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 8.97
## Std. Err.: 2.40
##
## 2 x log-likelihood: -493.31
disp <- model_nb$deviance / model_nb$df.residual
disp
## [1] 1.146979
PR <- tidy(model_nb, exponentiate = TRUE, conf.int = TRUE)
PR
## # A tibble: 3 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.00339 0.255 -22.3 4.72e-110 0.00210 0.00560
## 2 density 1.00 0.0000143 4.67 3.00e- 6 1.00 1.00
## 3 housing_prop 1.17 0.391 0.398 6.90e- 1 0.553 2.46
Interpretasi :
kepadatan penduduk berhubungan positif dan signifikan dengan prevalensi TBC di kabupaten/kota di Jawa Barat (PR = 1.000067; p < 0.001). Hal ini menunjukkan bahwa setiap peningkatan 1 jiwa/km² kepadatan penduduk berkaitan dengan peningkatan prevalensi TBC sebesar 0.0067%.
variabel persentase rumah layak huni tidak menunjukkan hubungan yang signifikan dengan prevalensi TBC (PR = 1.17; p = 0.69).
# 6. Tampilkan tabel prevalensi tertinggi
dplyr::select(data, kabkot, prevalence_per100k) %>%
arrange(desc(prevalence_per100k)) %>%
head(10)
## kabkot prevalence_per100k
## 1 KOTA CIREBON 1195.3023
## 2 KOTA BOGOR 1058.8399
## 3 KOTA SUKABUMI 950.6753
## 4 CIMAHI 753.7999
## 5 KOTA BANDUNG 736.7809
## 6 BANJAR 724.0574
## 7 KOTA TASIKMALAYA 625.1249
## 8 KARAWANG 527.4861
## 9 KOTA BEKASI 515.8733
## 10 BOGOR 512.2926