Import Library

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.2
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(MASS)
## Warning: package 'MASS' was built under R version 4.4.3
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(broom)
## Warning: package 'broom' was built under R version 4.4.3
library(dplyr)
library(sf)
## Warning: package 'sf' was built under R version 4.4.3
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(spdep)
## Warning: package 'spdep' was built under R version 4.4.3
## Loading required package: spData
## Warning: package 'spData' was built under R version 4.4.3
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(tmap)
## Warning: package 'tmap' was built under R version 4.4.3
library(geodata)
## Warning: package 'geodata' was built under R version 4.4.3
## Loading required package: terra
## Warning: package 'terra' was built under R version 4.4.3
## terra 1.8.60
## 
## Attaching package: 'terra'
## 
## The following object is masked from 'package:MASS':
## 
##     area
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

Data

data <- read.csv("DATA UTS EPIDEMIOLOGI.csv", sep = ";", dec =".")
data
##              KABKOT Populasi     Y    X1    X2
## 1             BOGOR  5682300 29110  1899 46.70
## 2          SUKABUMI  2828020 12411   679 32.83
## 3           CIANJUR  2584990 12197   712 38.29
## 4           BANDUNG  3753120 14012  2156 54.16
## 5             GARUT  2716950  9516   876 34.61
## 6       TASIKMALAYA  1920920  4881   710 37.62
## 7            CIAMIS  1259230  3477   789 67.22
## 8          KUNINGAN  1213930  4045  1018 77.55
## 9           CIREBON  2387960  9587  2228 80.39
## 10       MAJALENGKA  1352540  4517  1017 75.36
## 11         SUMEDANG  1187130  3930   758 69.00
## 12        INDRAMAYU  1914040  6092   922 85.78
## 13           SUBANG  1663160  6321   768 83.27
## 14       PURWAKARTA  1050340  4687  1058 66.51
## 15         KARAWANG  2554380 13474  1335 70.98
## 16           BEKASI  3273870 15458  2617 63.80
## 17    BANDUNG BARAT  1884190  4569  1468 38.87
## 18      PANGANDARAN   434100   968   385 61.91
## 19       KOTA BOGOR  1078350 11418  9683 49.53
## 20    KOTA SUKABUMI   365740  3477  7571 36.30
## 21     KOTA BANDUNG  2528160 18627 15176 41.97
## 22     KOTA CIREBON   344850  4122  8744 74.31
## 23      KOTA BEKASI  2644060 13640 12411 65.82
## 24            DEPOK  2163640  8422 10823 49.39
## 25           CIMAHI   598700  4513 14110 54.86
## 26 KOTA TASIKMALAYA   750730  4693  4081 45.01
## 27           BANJAR   209790  1519  1601 83.51
str(data)
## 'data.frame':    27 obs. of  5 variables:
##  $ KABKOT  : chr  "BOGOR" "SUKABUMI" "CIANJUR" "BANDUNG" ...
##  $ Populasi: int  5682300 2828020 2584990 3753120 2716950 1920920 1259230 1213930 2387960 1352540 ...
##  $ Y       : int  29110 12411 12197 14012 9516 4881 3477 4045 9587 4517 ...
##  $ X1      : int  1899 679 712 2156 876 710 789 1018 2228 1017 ...
##  $ X2      : num  46.7 32.8 38.3 54.2 34.6 ...
# Ubah nama kolom agar mudah dipakai
data <- data %>%
  rename(
    kabkot = KABKOT,
    population = Populasi,
    cases = Y,
    density = X1,
    housing = X2
  )

Prevalensi

# 2. Hitung Prevalensi TBC per 100.000 penduduk
data <- data %>%
  mutate(
    prevalence_per100k = (cases / population) * 100000,
    housing_prop = housing / 100   # ubah persen jadi proporsi
  )

# Lihat hasil
data
##              kabkot population cases density housing prevalence_per100k
## 1             BOGOR    5682300 29110    1899   46.70           512.2926
## 2          SUKABUMI    2828020 12411     679   32.83           438.8583
## 3           CIANJUR    2584990 12197     712   38.29           471.8393
## 4           BANDUNG    3753120 14012    2156   54.16           373.3427
## 5             GARUT    2716950  9516     876   34.61           350.2457
## 6       TASIKMALAYA    1920920  4881     710   37.62           254.0970
## 7            CIAMIS    1259230  3477     789   67.22           276.1211
## 8          KUNINGAN    1213930  4045    1018   77.55           333.2153
## 9           CIREBON    2387960  9587    2228   80.39           401.4724
## 10       MAJALENGKA    1352540  4517    1017   75.36           333.9642
## 11         SUMEDANG    1187130  3930     758   69.00           331.0505
## 12        INDRAMAYU    1914040  6092     922   85.78           318.2797
## 13           SUBANG    1663160  6321     768   83.27           380.0596
## 14       PURWAKARTA    1050340  4687    1058   66.51           446.2365
## 15         KARAWANG    2554380 13474    1335   70.98           527.4861
## 16           BEKASI    3273870 15458    2617   63.80           472.1629
## 17    BANDUNG BARAT    1884190  4569    1468   38.87           242.4915
## 18      PANGANDARAN     434100   968     385   61.91           222.9901
## 19       KOTA BOGOR    1078350 11418    9683   49.53          1058.8399
## 20    KOTA SUKABUMI     365740  3477    7571   36.30           950.6753
## 21     KOTA BANDUNG    2528160 18627   15176   41.97           736.7809
## 22     KOTA CIREBON     344850  4122    8744   74.31          1195.3023
## 23      KOTA BEKASI    2644060 13640   12411   65.82           515.8733
## 24            DEPOK    2163640  8422   10823   49.39           389.2514
## 25           CIMAHI     598700  4513   14110   54.86           753.7999
## 26 KOTA TASIKMALAYA     750730  4693    4081   45.01           625.1249
## 27           BANJAR     209790  1519    1601   83.51           724.0574
##    housing_prop
## 1        0.4670
## 2        0.3283
## 3        0.3829
## 4        0.5416
## 5        0.3461
## 6        0.3762
## 7        0.6722
## 8        0.7755
## 9        0.8039
## 10       0.7536
## 11       0.6900
## 12       0.8578
## 13       0.8327
## 14       0.6651
## 15       0.7098
## 16       0.6380
## 17       0.3887
## 18       0.6191
## 19       0.4953
## 20       0.3630
## 21       0.4197
## 22       0.7431
## 23       0.6582
## 24       0.4939
## 25       0.5486
## 26       0.4501
## 27       0.8351

Analisis Deskriptif

data_deskriptif <- data %>%
  dplyr::select(cases, population, density, housing, prevalence_per100k) %>%
  summarise_all(list(
    mean = ~mean(., na.rm = TRUE),
    median = ~median(., na.rm = TRUE),
    sd = ~sd(., na.rm = TRUE),
    min = ~min(., na.rm = TRUE),
    max = ~max(., na.rm = TRUE)
  ))

data_deskriptif
##   cases_mean population_mean density_mean housing_mean prevalence_per100k_mean
## 1   8506.778         1864637     3910.926     58.72407                505.0337
##   cases_median population_median density_median housing_median
## 1         6092           1884190           1468          61.91
##   prevalence_per100k_median cases_sd population_sd density_sd housing_sd
## 1                  438.8583 6240.981       1228053   4668.116   17.09696
##   prevalence_per100k_sd cases_min population_min density_min housing_min
## 1              251.0959       968         209790         385       32.83
##   prevalence_per100k_min cases_max population_max density_max housing_max
## 1               222.9901     29110        5682300       15176       85.78
##   prevalence_per100k_max
## 1               1195.302

Peta Persebaran Kasus TBC

# --- 1. Ambil peta kabupaten/kota Indonesia dari GADM ---
# (akan otomatis download jika belum ada, lalu disimpan permanen di D:/Data/Peta_GADM)
indo_shp <- geodata::gadm(country = "IDN", level = 2, path = "D:/Maulana Hardy Rayyan/UNPAD/Epidemiologi/UTS Epidemiologi")

# Konversi ke format sf
indo_map <- sf::st_as_sf(indo_shp)
# Filter hanya Provinsi Jawa Barat

jabar <- indo_map %>%
filter(NAME_1 %in% c("West Java", "Jawa Barat")) %>%
mutate(KABKOT = NAME_2)

# Normalisasi huruf besar untuk join data

jabar$KABKOT <- str_to_upper(jabar$KABKOT)
data$KABKOT <- str_to_upper(data$kabkot)

# Gabungkan data CSV dan shapefile
data_spatial <- jabar %>%
left_join(data, by = "KABKOT")

# Cek struktur data hasil gabungan
glimpse(data_spatial)
## Rows: 27
## Columns: 22
## $ GID_2              <chr> "IDN.9.2_1", "IDN.9.1_1", "IDN.9.3_1", "IDN.9.4_1",…
## $ GID_0              <chr> "IDN", "IDN", "IDN", "IDN", "IDN", "IDN", "IDN", "I…
## $ COUNTRY            <chr> "Indonesia", "Indonesia", "Indonesia", "Indonesia",…
## $ GID_1              <chr> "IDN.9_1", "IDN.9_1", "IDN.9_1", "IDN.9_1", "IDN.9_…
## $ NAME_1             <chr> "Jawa Barat", "Jawa Barat", "Jawa Barat", "Jawa Bar…
## $ NL_NAME_1          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ NAME_2             <chr> "Bandung", "Bandung Barat", "Banjar", "Bekasi", "Bo…
## $ VARNAME_2          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ NL_NAME_2          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ TYPE_2             <chr> "Kabupaten", "Kabupaten", "Kota", "Kabupaten", "Kab…
## $ ENGTYPE_2          <chr> "Regency", "Regency", "City", "Regency", "Regency",…
## $ CC_2               <chr> "3204", "3217", "3279", "3216", "3201", "3207", "32…
## $ HASC_2             <chr> "ID.JR.BU", "ID.JR.BB", "ID.JR.BA", "ID.JR.BS", "ID…
## $ KABKOT             <chr> "BANDUNG", "BANDUNG BARAT", "BANJAR", "BEKASI", "BO…
## $ kabkot             <chr> "BANDUNG", "BANDUNG BARAT", "BANJAR", "BEKASI", "BO…
## $ population         <int> 3753120, 1884190, 209790, 3273870, 5682300, 1259230…
## $ cases              <int> 14012, 4569, 1519, 15458, 29110, 3477, 12197, 4513,…
## $ density            <int> 2156, 1468, 1601, 2617, 1899, 789, 712, 14110, 2228…
## $ housing            <dbl> 54.16, 38.87, 83.51, 63.80, 46.70, 67.22, 38.29, 54…
## $ prevalence_per100k <dbl> 373.3427, 242.4915, 724.0574, 472.1629, 512.2926, 2…
## $ housing_prop       <dbl> 0.5416, 0.3887, 0.8351, 0.6380, 0.4670, 0.6722, 0.3…
## $ geometry           <GEOMETRY [°]> POLYGON ((107.6257 -7.29998..., POLYGON ((…
data_spatial <- data_spatial %>%
  filter(!is.na(cases), !is.na(population), !is.na(density), !is.na(housing))
tmap_mode("plot")
## ℹ tmap modes "plot" - "view"
## ℹ toggle with `tmap::ttm()`
tm_shape(data_spatial) +
  tm_polygons(
    col = "cases",           # pakai kolom 'cases' bukan 'Y'
    palette = "Reds",        # warna gradasi merah
    title = "Jumlah Kasus TBC"
  ) +
  tm_borders(lwd = 0.7, col = "gray40") +
  tm_layout(
    title = "Sebaran Kasus TBC di Kabupaten/Kota Jawa Barat (2024)",
    legend.outside = TRUE,
    frame = FALSE
  )
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).[v3->v4] `tm_polygons()`: use 'fill' for the fill color of polygons/symbols
## (instead of 'col'), and 'col' for the outlines (instead of 'border.col').[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'[v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(title = )`[cols4all] color palettes: use palettes from the R package cols4all. Run
## `cols4all::c4a_gui()` to explore them. The old palette name "Reds" is named
## "brewer.reds"Multiple palettes called "reds" found: "brewer.reds", "matplotlib.reds". The first one, "brewer.reds", is returned.
## [plot mode] fit legend/component: Some legend items or map compoments do not
## fit well, and are therefore rescaled.
## ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

Uji Autokorelasi Spasial

# --- 5. Hitung Moran’s I Global ---
# Konversi sf → Spatial
map_sp <- as_Spatial(data_spatial)

# Buat matriks tetangga (queen contiguity)
nb <- poly2nb(map_sp)

# Buat bobot spasial (row-standardized)
lw <- nb2listw(nb, style = "W")

# Uji Moran's I global
moran_global <- moran.test(data_spatial$cases, lw)
moran_global
## 
##  Moran I test under randomisation
## 
## data:  data_spatial$cases  
## weights: lw    
## 
## Moran I statistic standard deviate = 2.9347, p-value = 0.00167
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##        0.35148462       -0.04000000        0.01779563
# --- 8. Hitung Local Moran’s I (LISA) ---
local_moran <- localmoran(st_drop_geometry(data_spatial)$cases, lw)
head(local_moran)
##            Ii        E.Ii     Var.Ii        Z.Ii Pr(z != E(Ii))
## 1 -0.03867622 -0.02966099 0.08017623 -0.03183863    0.974600737
## 2 -0.14554706 -0.01949157 0.06556356 -0.49230075    0.622506751
## 3  1.05549883 -0.05775959 1.41500884  0.93587190    0.349339144
## 4  1.80599497 -0.04838894 0.36582139  3.06595023    0.002169794
## 5  1.47678414 -0.44997849 0.56976067  2.55259539    0.010692363
## 6  0.70568331 -0.03086110 0.12960434  2.04592245    0.040764001
# --- 9. Klasifikasi LISA (HH, LL, HL, LH) ---

# 1️⃣ Ambil nilai kasus dan rata-ratanya
cases <- st_drop_geometry(data_spatial)$cases
mean_val <- mean(cases, na.rm = TRUE)

# 2️⃣ Hitung nilai lag spasial (rata-rata tetangga)
lag_cases <- lag.listw(lw, cases)

# 3️⃣ Buat klasifikasi berdasarkan kuadran LISA
lisa_cluster <- ifelse(cases > mean_val & lag_cases > mean(lag_cases), "High-High",
                ifelse(cases < mean_val & lag_cases < mean(lag_cases), "Low-Low",
                ifelse(cases > mean_val & lag_cases < mean(lag_cases), "High-Low",
                ifelse(cases < mean_val & lag_cases > mean(lag_cases), "Low-High", "Undefined"))))

# 4️⃣ Tambahkan ke data spasial
data_spatial$lisa_cluster <- lisa_cluster

# 5️⃣ Cek distribusi klaster
table(data_spatial$lisa_cluster)
## 
## High-High  High-Low  Low-High   Low-Low 
##         7         4         4        11
# 6️⃣ Visualisasikan peta LISA
tm_shape(data_spatial) +
  tm_polygons(
    col = "lisa_cluster",
    palette = c("red", "blue", "pink", "lightblue", "gray"),
    title = "Klaster LISA (Local Moran’s I)"
  ) +
  tm_borders() +
  tm_layout(
    legend.outside = TRUE,
    frame = FALSE
  )
## 
## ── tmap v3 code detected ───────────────────────────────────────────────────────
## [v3->v4] `tm_tm_polygons()`: migrate the argument(s) related to the scale of
## the visual variable `fill` namely 'palette' (rename to 'values') to fill.scale
## = tm_scale(<HERE>).
## [v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
## visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'

Korelasi Antar Variabel

# --- 1. Pastikan library corrplot sudah di-load ---
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
# --- 2. Pilih variabel numerik ---
data_num <- data %>% 
  dplyr::select(cases, density, housing)

# --- 3. Hitung matriks korelasi ---
cor_matrix <- cor(data_num, method = "pearson", use = "complete.obs")

# --- 4. Plot hasil korelasi ---
corrplot(
  cor_matrix, 
  method = "color",       # bisa juga "circle", "number", dsb.
  type = "upper",         # hanya tampilkan segitiga atas
  order = "hclust",       # urutkan berdasarkan clustering
  addCoef.col = "black",  # tampilkan nilai korelasi
  tl.col = "black",       # warna label
  tl.srt = 45,            # rotasi label
  col = colorRampPalette(c("blue", "white", "red"))(200)  # gradasi warna
)

Multikolinearitas

# --- 2. Buat model regresi "dummy" ---
# Model ini hanya untuk menghitung VIF antar variabel bebas (density dan housing)
model_vif <- lm(cases ~ density + housing, data = data)

# --- 3. Hitung nilai VIF ---
vif_values <- car::vif(model_vif)

# --- 4. Tampilkan hasil ---
vif_values
##  density  housing 
## 1.040387 1.040387

Regresi Poisson

# 3. Model Cross-Sectional → Prevalence Rate Ratio (PRR) via Poisson Regression
model_pois <- glm(
  cases ~ density + housing_prop + offset(log(population)),
  family = poisson(link = "log"),
  data = data
)

summary(model_pois)
## 
## Call:
## glm(formula = cases ~ density + housing_prop + offset(log(population)), 
##     family = poisson(link = "log"), data = data)
## 
## Coefficients:
##                Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)  -5.482e+00  8.087e-03 -677.955   <2e-16 ***
## density       4.172e-05  4.125e-07  101.141   <2e-16 ***
## housing_prop -1.339e-01  1.339e-02   -9.999   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 27033  on 26  degrees of freedom
## Residual deviance: 17191  on 24  degrees of freedom
## AIC: 17484
## 
## Number of Fisher Scoring iterations: 4

Prevalence Rate Ratio

# 4. Konversi koefisien ke PRR (Prevalence Rate Ratio)
PRR <- tidy(model_pois, exponentiate = TRUE, conf.int = TRUE)
PRR
## # A tibble: 3 × 7
##   term         estimate   std.error statistic  p.value conf.low conf.high
##   <chr>           <dbl>       <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
## 1 (Intercept)   0.00416 0.00809        -678.  0         0.00409   0.00423
## 2 density       1.00    0.000000413     101.  0         1.00      1.00   
## 3 housing_prop  0.875   0.0134          -10.0 1.54e-23  0.852     0.898

Overdispersion

# 5. Cek Overdispersion
disp <- model_pois$deviance / model_pois$df.residual
disp
## [1] 716.3092

Jadinya Pake Binomial Negatif

# Jika overdispersi → gunakan Negative Binomial
if (disp > 1) {
  model_nb <- glm.nb(
    cases ~ density + housing_prop + offset(log(population)),
    data = data
  )
  
  summary(model_nb)
}
## 
## Call:
## glm.nb(formula = cases ~ density + housing_prop + offset(log(population)), 
##     data = data, init.theta = 8.967353605, link = log)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -5.686e+00  2.551e-01 -22.289   <2e-16 ***
## density       6.687e-05  1.432e-05   4.671    3e-06 ***
## housing_prop  1.558e-01  3.911e-01   0.398     0.69    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(8.9674) family taken to be 1)
## 
##     Null deviance: 48.868  on 26  degrees of freedom
## Residual deviance: 27.527  on 24  degrees of freedom
## AIC: 501.31
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  8.97 
##           Std. Err.:  2.40 
## 
##  2 x log-likelihood:  -493.31
disp <- model_nb$deviance / model_nb$df.residual
disp
## [1] 1.146979

PR Binomial Negatif

PR <- tidy(model_nb, exponentiate = TRUE, conf.int = TRUE)
PR
## # A tibble: 3 × 7
##   term         estimate std.error statistic   p.value conf.low conf.high
##   <chr>           <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)   0.00339 0.255       -22.3   4.72e-110  0.00210   0.00560
## 2 density       1.00    0.0000143     4.67  3.00e-  6  1.00      1.00   
## 3 housing_prop  1.17    0.391         0.398 6.90e-  1  0.553     2.46

Interpretasi :

# 6. Tampilkan tabel prevalensi tertinggi
dplyr::select(data, kabkot, prevalence_per100k) %>%
  arrange(desc(prevalence_per100k)) %>%
  head(10)
##              kabkot prevalence_per100k
## 1      KOTA CIREBON          1195.3023
## 2        KOTA BOGOR          1058.8399
## 3     KOTA SUKABUMI           950.6753
## 4            CIMAHI           753.7999
## 5      KOTA BANDUNG           736.7809
## 6            BANJAR           724.0574
## 7  KOTA TASIKMALAYA           625.1249
## 8          KARAWANG           527.4861
## 9       KOTA BEKASI           515.8733
## 10            BOGOR           512.2926