library(readxl) 
## Warning: package 'readxl' was built under R version 4.4.3
library(janitor)
## Warning: package 'janitor' was built under R version 4.4.3
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(ResourceSelection)
## Warning: package 'ResourceSelection' was built under R version 4.4.3
## ResourceSelection 0.3-6   2023-06-27
library(pROC)
## Warning: package 'pROC' was built under R version 4.4.3
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
library(margins)
## Warning: package 'margins' was built under R version 4.4.3
library(sf)
## Warning: package 'sf' was built under R version 4.4.3
## Linking to GEOS 3.13.0, GDAL 3.10.1, PROJ 9.5.1; sf_use_s2() is TRUE
library(rnaturalearth)
## Warning: package 'rnaturalearth' was built under R version 4.4.3
library(rnaturalearthdata)
## Warning: package 'rnaturalearthdata' was built under R version 4.4.3
## 
## Attaching package: 'rnaturalearthdata'
## The following object is masked from 'package:rnaturalearth':
## 
##     countries110
library(geodata)
## Warning: package 'geodata' was built under R version 4.4.3
## Loading required package: terra
## Warning: package 'terra' was built under R version 4.4.3
## terra 1.8.70
## 
## Attaching package: 'terra'
## The following object is masked from 'package:janitor':
## 
##     crosstab

Input Data

library(readxl)
Dataset_Stunting <- read_excel("C:/Users/Lenovo/Downloads/Dataset Stunting.xlsx")
View(Dataset_Stunting)
data_raw <- Dataset_Stunting
data <- clean_names(data_raw)
head(data)
## # A tibble: 6 × 9
##   provinsi     stunting_0_1 status_stunting kedalaman_kemiskinan…¹ rls_perempuan
##   <chr>               <dbl>           <dbl>                  <dbl>         <dbl>
## 1 ACEH                    1            28.6                   1.84          9.49
## 2 SUMATERA UT…            1            22                     1.13          9.77
## 3 SUMATERA BA…            1            24.9                   0.74          9.53
## 4 RIAU                    1            20.1                   0.87          9.32
## 5 JAMBI                   0            17.1                   0.98          8.6 
## 6 SUMATERA SE…            0            15.9                   1.64          8.35
## # ℹ abbreviated name: ¹​kedalaman_kemiskinan_percent
## # ℹ 4 more variables: sanitasi_layak_percent <dbl>,
## #   air_minum_layak_percent <dbl>, imunisasi_lengkap_percent <dbl>,
## #   asi_eksklusif_percent <dbl>
str(data)
## tibble [38 × 9] (S3: tbl_df/tbl/data.frame)
##  $ provinsi                    : chr [1:38] "ACEH" "SUMATERA UTARA" "SUMATERA BARAT" "RIAU" ...
##  $ stunting_0_1                : num [1:38] 1 1 1 1 0 0 0 0 1 0 ...
##  $ status_stunting             : num [1:38] 28.6 22 24.9 20.1 17.1 15.9 18.8 15.9 20.1 15 ...
##  $ kedalaman_kemiskinan_percent: num [1:38] 1.84 1.13 0.74 0.87 0.98 1.64 1.83 1.54 0.65 0.69 ...
##  $ rls_perempuan               : num [1:38] 9.49 9.77 9.53 9.32 8.6 ...
##  $ sanitasi_layak_percent      : num [1:38] 81.1 85.7 72.8 86.3 84 ...
##  $ air_minum_layak_percent     : num [1:38] 90.1 92.9 86.8 92.2 82.2 ...
##  $ imunisasi_lengkap_percent   : num [1:38] 25.9 41.1 39.8 45.6 53.7 ...
##  $ asi_eksklusif_percent       : num [1:38] 67.8 66.4 76.4 71.5 74.3 ...
summary(data)
##    provinsi          stunting_0_1    status_stunting
##  Length:38          Min.   :0.0000   Min.   : 8.70  
##  Class :character   1st Qu.:0.0000   1st Qu.:17.45  
##  Mode  :character   Median :1.0000   Median :22.55  
##                     Mean   :0.6842   Mean   :23.01  
##                     3rd Qu.:1.0000   3rd Qu.:26.10  
##                     Max.   :1.0000   Max.   :40.00  
##  kedalaman_kemiskinan_percent rls_perempuan    sanitasi_layak_percent
##  Min.   :0.4700               Min.   : 3.400   Min.   :12.61         
##  1st Qu.:0.8625               1st Qu.: 8.057   1st Qu.:79.38         
##  Median :1.4750               Median : 8.720   Median :83.36         
##  Mean   :1.8792               Mean   : 8.608   Mean   :81.14         
##  3rd Qu.:1.9425               3rd Qu.: 9.447   3rd Qu.:87.18         
##  Max.   :5.6200               Max.   :11.190   Max.   :96.83         
##  air_minum_layak_percent imunisasi_lengkap_percent asi_eksklusif_percent
##  Min.   :30.64           Min.   :18.41             Min.   :44.64        
##  1st Qu.:82.22           1st Qu.:53.54             1st Qu.:65.78        
##  Median :89.02           Median :62.13             Median :72.42        
##  Mean   :87.00           Mean   :60.41             Mean   :70.57        
##  3rd Qu.:94.65           3rd Qu.:71.45             3rd Qu.:76.44        
##  Max.   :99.96           Max.   :85.58             Max.   :83.07
names(data)
## [1] "provinsi"                     "stunting_0_1"                
## [3] "status_stunting"              "kedalaman_kemiskinan_percent"
## [5] "rls_perempuan"                "sanitasi_layak_percent"      
## [7] "air_minum_layak_percent"      "imunisasi_lengkap_percent"   
## [9] "asi_eksklusif_percent"
table(data$stunting_0_1)
## 
##  0  1 
## 12 26
if ("yp_hat" %in% names(data))  data$yp_hat <- NULL
if ("pred_label" %in% names(data)) data$pred_label <- NULL
table(data$stunting_0_1)
## 
##  0  1 
## 12 26
prop.table(table(data$stunting_0_1))
## 
##         0         1 
## 0.3157895 0.6842105

Peta Persentase stunting 2024

indonesia_map <- gadm(country = "IDN", level = 1, path = tempdir())
indonesia_map <- st_as_sf(indonesia_map)
data$provinsi
##  [1] "ACEH"                 "SUMATERA UTARA"       "SUMATERA BARAT"      
##  [4] "RIAU"                 "JAMBI"                "SUMATERA SELATAN"    
##  [7] "BENGKULU"             "LAMPUNG"              "KEP, BANGKA BELITUNG"
## [10] "KEP, RIAU"            "DKI JAKARTA"          "JAWA BARAT"          
## [13] "JAWA TENGAH"          "DI YOGYAKARTA"        "JAWA TIMUR"          
## [16] "BANTEN"               "BALI"                 "NUSA TENGGARA BARAT" 
## [19] "NUSA TENGGARA TIMUR"  "KALIMANTAN BARAT"     "KALIMANTAN TENGAH"   
## [22] "KALIMANTAN SELATAN"   "KALIMANTAN TIMUR"     "KALIMANTAN UTARA"    
## [25] "SULAWESI UTARA"       "SULAWESI TENGAH"      "SULAWESI SELATAN"    
## [28] "SULAWESI TENGGARA"    "GORONTALO"            "SULAWESI BARAT"      
## [31] "MALUKU"               "MALUKU UTARA"         "PAPUA BARAT"         
## [34] "PAPUA BARAT DAYA"     "PAPUA"                "PAPUA SELATAN"       
## [37] "PAPUA TENGAH"         "PAPUA PEGUNUNGAN"
unique(indonesia_map$NAME_1)
##  [1] "Aceh"                "Bali"                "Bangka Belitung"    
##  [4] "Banten"              "Bengkulu"            "Gorontalo"          
##  [7] "Jakarta Raya"        "Jambi"               "Jawa Barat"         
## [10] "Jawa Tengah"         "Jawa Timur"          "Kalimantan Barat"   
## [13] "Kalimantan Selatan"  "Kalimantan Tengah"   "Kalimantan Timur"   
## [16] "Kalimantan Utara"    "Kepulauan Riau"      "Lampung"            
## [19] "Maluku"              "Maluku Utara"        "Nusa Tenggara Barat"
## [22] "Nusa Tenggara Timur" "Papua"               "Papua Barat"        
## [25] "Riau"                "Sulawesi Barat"      "Sulawesi Selatan"   
## [28] "Sulawesi Tengah"     "Sulawesi Tenggara"   "Sulawesi Utara"     
## [31] "Sumatera Barat"      "Sumatera Selatan"    "Sumatera Utara"     
## [34] "Yogyakarta"
library(dplyr)

# 1. Tambah kolom key di data stunting
data <- data %>%
  mutate(prov_key = provinsi)

# 2. Sesuaikan nama yang beda penulisannya
data$prov_key[data$provinsi == "KEP, BANGKA BELITUNG"] <- "Bangka Belitung"
data$prov_key[data$provinsi == "KEP, RIAU"]             <- "Kepulauan Riau"
data$prov_key[data$provinsi == "DKI JAKARTA"]           <- "Jakarta Raya"
data$prov_key[data$provinsi == "DI YOGYAKARTA"]         <- "Yogyakarta"

# 3. Provinsi pemekaran Papua → digabung ke provinsi lama
data$prov_key[data$provinsi %in% c("PAPUA SELATAN",
                                   "PAPUA TENGAH",
                                   "PAPUA PEGUNUNGAN")] <- "Papua"
data$prov_key[data$provinsi == "PAPUA BARAT DAYA"]      <- "Papua Barat"

# 4. Samakan kapital dengan shapefile
data$prov_key          <- toupper(data$prov_key)
indonesia_map$prov_key <- toupper(indonesia_map$NAME_1)
sort(unique(data$prov_key))
##  [1] "ACEH"                "BALI"                "BANGKA BELITUNG"    
##  [4] "BANTEN"              "BENGKULU"            "GORONTALO"          
##  [7] "JAKARTA RAYA"        "JAMBI"               "JAWA BARAT"         
## [10] "JAWA TENGAH"         "JAWA TIMUR"          "KALIMANTAN BARAT"   
## [13] "KALIMANTAN SELATAN"  "KALIMANTAN TENGAH"   "KALIMANTAN TIMUR"   
## [16] "KALIMANTAN UTARA"    "KEPULAUAN RIAU"      "LAMPUNG"            
## [19] "MALUKU"              "MALUKU UTARA"        "NUSA TENGGARA BARAT"
## [22] "NUSA TENGGARA TIMUR" "PAPUA"               "PAPUA BARAT"        
## [25] "RIAU"                "SULAWESI BARAT"      "SULAWESI SELATAN"   
## [28] "SULAWESI TENGAH"     "SULAWESI TENGGARA"   "SULAWESI UTARA"     
## [31] "SUMATERA BARAT"      "SUMATERA SELATAN"    "SUMATERA UTARA"     
## [34] "YOGYAKARTA"
sort(unique(indonesia_map$prov_key))
##  [1] "ACEH"                "BALI"                "BANGKA BELITUNG"    
##  [4] "BANTEN"              "BENGKULU"            "GORONTALO"          
##  [7] "JAKARTA RAYA"        "JAMBI"               "JAWA BARAT"         
## [10] "JAWA TENGAH"         "JAWA TIMUR"          "KALIMANTAN BARAT"   
## [13] "KALIMANTAN SELATAN"  "KALIMANTAN TENGAH"   "KALIMANTAN TIMUR"   
## [16] "KALIMANTAN UTARA"    "KEPULAUAN RIAU"      "LAMPUNG"            
## [19] "MALUKU"              "MALUKU UTARA"        "NUSA TENGGARA BARAT"
## [22] "NUSA TENGGARA TIMUR" "PAPUA"               "PAPUA BARAT"        
## [25] "RIAU"                "SULAWESI BARAT"      "SULAWESI SELATAN"   
## [28] "SULAWESI TENGAH"     "SULAWESI TENGGARA"   "SULAWESI UTARA"     
## [31] "SUMATERA BARAT"      "SUMATERA SELATAN"    "SUMATERA UTARA"     
## [34] "YOGYAKARTA"
map_data <- indonesia_map %>%
  left_join(data, by = "prov_key")
library(ggplot2)

ggplot(map_data) +
  geom_sf(aes(fill = status_stunting)) +
  scale_fill_gradient(low = "yellow", high = "darkred") +
  theme_minimal() +
  labs(
    title = "Peta Persentase Stunting per Provinsi, 2024",
    fill  = "% Stunting"
  )

Pie chart 3D

library(plotrix)
## Warning: package 'plotrix' was built under R version 4.4.3
## 
## Attaching package: 'plotrix'
## The following object is masked from 'package:terra':
## 
##     rescale
# 1. Buat tabel frekuensi Y
STUNTING <- table(Dataset_Stunting$`Stunting (0/1)`)

# Pastikan urutan kategori adalah 0 (rendah) lalu 1 (tinggi)
STUNTING <- STUNTING[c("0", "1")]

# 2. Label kategori
kat <- c("Stunting Rendah = ", "Stunting Tinggi = ")

# 3. Hitung persentase
persentase <- round(STUNTING / sum(STUNTING) * 100)

# 4. Gabungkan label + persentase
label_pie <- paste0(kat, persentase, "%")

# 5. Pie 3D
pie3D(STUNTING,
      labels = label_pie,
      explode = 0.05,
      main = "Persentase Provinsi berdasarkan Status Stunting tahun 2024")

Barplot % stunting per provinsi

ggplot(data, aes(x = reorder(provinsi, status_stunting),
                 y = status_stunting)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Persentase Stunting per Provinsi (2024)",
       x = "Provinsi", y = "% Stunting") +
  theme_minimal()

Model regresi logistik biner

model_logit <- glm(
  stunting_0_1 ~ kedalaman_kemiskinan_percent +
    rls_perempuan + sanitasi_layak_percent +
    air_minum_layak_percent + imunisasi_lengkap_percent +
    asi_eksklusif_percent,
  data = data,
  family = binomial(link = "logit")
)

summary(model_logit)
## 
## Call:
## glm(formula = stunting_0_1 ~ kedalaman_kemiskinan_percent + rls_perempuan + 
##     sanitasi_layak_percent + air_minum_layak_percent + imunisasi_lengkap_percent + 
##     asi_eksklusif_percent, family = binomial(link = "logit"), 
##     data = data)
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                  15.9002620 11.3117662   1.406   0.1598  
## kedalaman_kemiskinan_percent  1.6393643  0.9757740   1.680   0.0929 .
## rls_perempuan                -0.7055246  0.6579359  -1.072   0.2836  
## sanitasi_layak_percent        0.1383507  0.1302126   1.062   0.2880  
## air_minum_layak_percent       0.0005545  0.0751338   0.007   0.9941  
## imunisasi_lengkap_percent    -0.1515265  0.0707883  -2.141   0.0323 *
## asi_eksklusif_percent        -0.1832698  0.0945016  -1.939   0.0525 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 47.398  on 37  degrees of freedom
## Residual deviance: 28.796  on 31  degrees of freedom
## AIC: 42.796
## 
## Number of Fisher Scoring iterations: 7

Uji signifikansi keseluruhan model (Omnibus LRT, G²)

llh_full <- logLik(model_logit)
llh_null <- logLik(update(model_logit, . ~ 1))

G2    <- -2 * as.numeric(llh_null - llh_full)
df_G2 <- model_logit$rank - 1
p_G2  <- 1 - pchisq(G2, df = df_G2)

G2
## [1] 18.60155
df_G2
## [1] 6
p_G2
## [1] 0.004892253
qchisq(0.90, df = 6)
## [1] 10.64464

Goodness-of-Fit model logistik (Hosmer–Lemeshow)

library(ResourceSelection)
y_num <- as.numeric(as.character(data$stunting_0_1))

hoslem.test(y_num, fitted(model_logit))
## 
##  Hosmer and Lemeshow goodness of fit (GOF) test
## 
## data:  y_num, fitted(model_logit)
## X-squared = 4.2113, df = 8, p-value = 0.8376

Cek multikolinearitas antar prediktor (VIF)

library(car)
model_lm_vif <- lm(
  as.numeric(stunting_0_1) ~ kedalaman_kemiskinan_percent +
    rls_perempuan + sanitasi_layak_percent +
    air_minum_layak_percent + imunisasi_lengkap_percent +
    asi_eksklusif_percent,
  data = data
)

vif(model_lm_vif)
## kedalaman_kemiskinan_percent                rls_perempuan 
##                     2.763133                     3.290841 
##       sanitasi_layak_percent      air_minum_layak_percent 
##                     7.685659                     3.082817 
##    imunisasi_lengkap_percent        asi_eksklusif_percent 
##                     2.162206                     1.397445

Generate fitted probability (yp_hat)

yp_hat <- fitted(model_logit)
yp_hat
##          1          2          3          4          5          6          7 
## 0.99920979 0.98748902 0.61745533 0.93847549 0.79374336 0.88162934 0.43142876 
##          8          9         10         11         12         13         14 
## 0.33545905 0.81187323 0.49152882 0.10879122 0.09410344 0.26501737 0.07176323 
##         15         16         17         18         19         20         21 
## 0.42049558 0.84371904 0.23156971 0.41855310 0.74926188 0.84583679 0.92203098 
##         22         23         24         25         26         27         28 
## 0.41433748 0.13582973 0.55340420 0.64701204 0.91247496 0.60132839 0.87683275 
##         29         30         31         32         33         34         35 
## 0.97290088 0.82708957 0.98615992 0.83067703 0.99989521 0.99964350 0.99999175 
##         36         37         38 
## 0.98437333 0.99995604 0.99865868
data$yp_hat <- yp_hat

Evaluasi performa → ROC–AUC

library(pROC)
roc_obj <- roc(y_num, yp_hat)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
auc(roc_obj)
## Area under the curve: 0.8878
plot(roc_obj,
     main = "ROC Curve Model Regresi Logistik Stunting")

Odds Ratio (OR) + CI → interpretasi pengaruh X

# OR
OR <- exp(coef(model_logit))
OR
##                  (Intercept) kedalaman_kemiskinan_percent 
##                 8.042592e+06                 5.151893e+00 
##                rls_perempuan       sanitasi_layak_percent 
##                 4.938494e-01                 1.148378e+00 
##      air_minum_layak_percent    imunisasi_lengkap_percent 
##                 1.000555e+00                 8.593951e-01 
##        asi_eksklusif_percent 
##                 8.325435e-01
# Confidence interval 95% untuk OR
exp(confint.default(model_logit))
##                                    2.5 %       97.5 %
## (Intercept)                  0.001891486 3.419707e+16
## kedalaman_kemiskinan_percent 0.761003194 3.487765e+01
## rls_perempuan                0.136006141 1.793208e+00
## sanitasi_layak_percent       0.889708585 1.482252e+00
## air_minum_layak_percent      0.863548650 1.159297e+00
## imunisasi_lengkap_percent    0.748062325 9.872973e-01
## asi_eksklusif_percent        0.691778427 1.001952e+00

Marginal Effect Analysis

meff <- margins(model_logit)
summary(meff)
##                        factor     AME     SE       z      p   lower   upper
##       air_minum_layak_percent  0.0001 0.0093  0.0074 0.9941 -0.0181  0.0182
##         asi_eksklusif_percent -0.0226 0.0092 -2.4415 0.0146 -0.0407 -0.0045
##     imunisasi_lengkap_percent -0.0187 0.0060 -3.1009 0.0019 -0.0305 -0.0069
##  kedalaman_kemiskinan_percent  0.2019 0.1008  2.0041 0.0451  0.0044  0.3994
##                 rls_perempuan -0.0869 0.0762 -1.1407 0.2540 -0.2362  0.0624
##        sanitasi_layak_percent  0.0170 0.0149  1.1429 0.2531 -0.0122  0.0463
plot(meff)

Evaluasi lanjutan → Confusion Matrix

library(caret)
cutoff <-0.5   
cutoff
## [1] 0.5
data$pred_label <- ifelse(yp_hat > cutoff, 1, 0)

conf_mat <- confusionMatrix(
  factor(data$pred_label, levels = c(0, 1)),
  factor(y_num,           levels = c(0, 1))
)

conf_mat
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  9  3
##          1  3 23
##                                           
##                Accuracy : 0.8421          
##                  95% CI : (0.6875, 0.9398)
##     No Information Rate : 0.6842          
##     P-Value [Acc > NIR] : 0.02268         
##                                           
##                   Kappa : 0.6346          
##                                           
##  Mcnemar's Test P-Value : 1.00000         
##                                           
##             Sensitivity : 0.7500          
##             Specificity : 0.8846          
##          Pos Pred Value : 0.7500          
##          Neg Pred Value : 0.8846          
##              Prevalence : 0.3158          
##          Detection Rate : 0.2368          
##    Detection Prevalence : 0.3158          
##       Balanced Accuracy : 0.8173          
##                                           
##        'Positive' Class : 0               
##