Ringkasan analisis: 32 skenario SAE — 2 metode seleksi aux (Backward vs Top-n/10) × 4 model (EBLUP, GLMM-Logit, EB Beta-Binomial, HB Beta) × 4 partisi (All, RSE Natural Break, RSE Equal Size, Cluster-Aux). Baseline: Direct Estimator.


1. Library & Cek Paket

Library

# Load urutan: MASS & car DULU, baru dplyr (cegah masking select/recode)
library(MASS)
library(car)
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(patchwork)
library(sae)        # mseFH
library(glmmTMB)    # GLMM binomial
library(classInt)   # Jenks Natural Breaks
library(factoextra)
library(knitr)
library(kableExtra)

# Ikat namespace kritis
select <- dplyr::select
recode <- dplyr::recode

Cek JAGS & saeHB

has_jags <- tryCatch({
  library(rjags); library(saeHB); TRUE
}, error = function(e) {
  message("saeHB/JAGS tidak tersedia — skenario HB Beta akan fallback ke direct.")
  FALSE
})
cat("Status JAGS/saeHB:", ifelse(has_jags, "Tersedia", "Tidak tersedia"), "\n")
## Status JAGS/saeHB: Tersedia

2. Load Data & Persiapan

Load Data

Estimasi_531  <- read_excel("C:/Users/User/Downloads/Hasil_sulawesi_KabKota.xlsx")
Podes_kab2024 <- read_excel("C:/Users/User/Downloads/Podes_kab_lengkap.xlsx")
Data_Penduduk <- read_excel("C:/Users/User/Downloads/Data_Penduduk.xlsx")

Estimasi_531  <- Estimasi_531  %>% mutate(Kako = sprintf("%04d", Kako))
Podes_kab2024 <- Podes_kab2024 %>% mutate(kode_kab = sprintf("%04s", kode_kab))

df_gabungan <- Estimasi_531 %>%
  left_join(Podes_kab2024, by = c("Kako" = "kode_kab")) %>%
  dplyr::select(Kako, Estimasi, RSE, starts_with("X"))

df_gabungan$Kako <- as.numeric(df_gabungan$Kako)
df_final <- df_gabungan %>% left_join(Data_Penduduk, by = "Kako")

cat("Jumlah domain:", nrow(df_final), "\n")
## Jumlah domain: 81
cat("Jumlah variabel:", ncol(df_final), "\n")
## Jumlah variabel: 112

Variabel Rasio

df_final <- df_final %>%
  mutate(
    X6  = X6_jumlah  / Jumlah_Penduduk * 1000,
    X7  = X7_jumlah  / Jumlah_Penduduk * 1000,
    X10 = X10_jumlah / Jumlah_Penduduk * 1000,
    X33 = X33_jumlah / Jumlah_Penduduk * 1000,
    X11 = X11_jumlah / Jumlah_P * 1000,
    X12 = X12_jumlah / Jumlah_P * 1000,
    X27 = X27_jumlah / Jumlah_P * 1000,
    X28 = X28_jumlah / Jumlah_P * 1000,
    X29 = X29_jumlah / Jumlah_P * 1000,
    X30 = X30_jumlah / Jumlah_P * 1000,
    X31 = X31_jumlah / Jumlah_P * 1000,
    X35 = X35_jumlah / Jumlah_P * 1000,
    X36 = X36_jumlah / Jumlah_P * 1000
  )

Pre-filter Kandidat Global

eps_logit <- 1e-4

df_base <- df_final %>%
  mutate(
    vardir  = (RSE / 100 * Estimasi)^2,
    y_prop  = pmax(pmin(Estimasi / 100, 1 - eps_logit), eps_logit),
    y_logit = log(y_prop / (1 - y_prop))
  ) %>%
  as.data.frame()

# ── Kandidat aux: HANYA kolom yang diawali "X" ───────────────────────────
# RSE_direct_col, grup_jenks, cluster_*, dll yang ditambahkan kemudian
# otomatis TIDAK masuk karena tidak berawalan X
kandidat_vars <- names(df_base)[startsWith(names(df_base), "X")]

# ── excl_always: otomatis semua kolom SELAIN X* ──────────────────────────
# Diupdate di setiap fungsi seleksi agar mencakup kolom baru yang
# ditambahkan setelah chunk ini (RSE_direct_col, grup_*, cluster_*)
excl_always <- setdiff(names(df_base), kandidat_vars)

cat("Variabel kandidat (", length(kandidat_vars), "):\n",
    paste(kandidat_vars, collapse = ", "), "\n")
## Variabel kandidat ( 119 ):
##  X1, X2, X8, X9, X13, X14, X15, X16, X17, X18, X19, X21, X22, X23, X24, X25, X26, X32, X37, X38, X39, X40, X41, X43, X44, X45, X1_jumlah, X1_mean, X2_jumlah, X2_mean, X8_jumlah, X8_mean, X9_jumlah, X9_mean, X13_jumlah, X13_mean, X14_jumlah, X14_mean, X15_jumlah, X15_mean, X16_jumlah, X16_mean, X17_jumlah, X17_mean, X18_jumlah, X18_mean, X19_jumlah, X19_mean, X21_jumlah, X21_mean, X22_jumlah, X22_mean, X23_jumlah, X23_mean, X24_jumlah, X24_mean, X25_jumlah, X25_mean, X26_jumlah, X26_mean, X32_jumlah, X32_mean, X34_jumlah, X34_mean, X37_jumlah, X37_mean, X38_jumlah, X38_mean, X39_jumlah, X39_mean, X40_jumlah, X40_mean, X41_jumlah, X41_mean, X43_jumlah, X43_mean, X44_jumlah, X44_mean, X45_jumlah, X45_mean, X6_jumlah, X6_mean, X7_jumlah, X7_mean, X10_jumlah, X10_mean, X11_jumlah, X11_mean, X12_jumlah, X12_mean, X27_jumlah, X27_mean, X28_jumlah, X28_mean, X29_jumlah, X29_mean, X30_jumlah, X30_mean, X31_jumlah, X31_mean, X33_jumlah, X33_mean, X35_jumlah, X35_mean, X36_jumlah, X36_mean, X6, X7, X10, X33, X11, X12, X27, X28, X29, X30, X31, X35, X36
cat("\nTotal dikecualikan:", length(excl_always), "kolom\n")
## 
## Total dikecualikan: 9 kolom

3. Fungsi Pembantu

Seleksi A1: Backward

# Helper: pilih 1 representatif terbaik per grup X (X19, X19_mean, X19_jumlah → 1)
# Pemilihan berdasarkan |cor| tertinggi ke y_col pada data segmen berjalan
pilih_per_grup <- function(df, kandidat, y_col) {
  get_base <- function(x) sub("(_jumlah|_mean)$", "", x)
  bases    <- unique(sapply(kandidat, get_base))
  terpilih <- c()
  for (b in bases) {
    anggota <- kandidat[get_base(kandidat) == b]
    if (length(anggota) == 1) { terpilih <- c(terpilih, anggota); next }
    cor_v   <- sapply(anggota, function(v)
      abs(cor(df[[v]], df[[y_col]], use = "complete.obs")))
    cor_v   <- cor_v[!is.na(cor_v)]
    if (length(cor_v) == 0) { terpilih <- c(terpilih, anggota[1]); next }
    terpilih <- c(terpilih, names(which.max(cor_v)))
  }
  unique(terpilih)
}

seleksi_backward <- function(df, y_col, excl_cols = NULL, r2_cap = 0.8) {
  if (is.null(excl_cols)) excl_cols <- names(df)[!startsWith(names(df), "X")]
  n_domain <- nrow(df)
  max_top  <- max(1L, floor(n_domain / 10L))

  keep <- sapply(df, function(x) length(unique(na.omit(x))) > 1)
  df   <- df[, keep]

  kandidat <- setdiff(names(df), excl_cols)
  kandidat <- kandidat[startsWith(kandidat, "X")]
  if (length(kandidat) == 0) return(character(0))

  # Pilih 1 representatif per grup X (sesuai y_col & data segmen ini)
  kandidat <- pilih_per_grup(df, kandidat, y_col)
  if (length(kandidat) == 0) return(character(0))

  cor_v <- sapply(df[kandidat], function(x)
    abs(cor(x, df[[y_col]], use = "complete.obs")))
  cor_v <- cor_v[!is.na(cor_v)]
  if (length(cor_v) == 0) return(character(0))
  vars  <- names(sort(cor_v, decreasing = TRUE))[1:min(max_top, length(cor_v))]
  vars  <- vars[!is.na(vars)]

  step_m <- try(stepAIC(
    lm(as.formula(paste(y_col, "~", paste(vars, collapse = "+"))), data = df),
    direction = "backward", trace = FALSE), silent = TRUE)
  if (!inherits(step_m, "try-error")) {
    sv <- names(coef(step_m))[-1]
    if (length(sv) > 0) vars <- sv
  }

  repeat {
    if (length(vars) <= 1) break
    m <- try(lm(as.formula(paste(y_col,"~",paste(vars,collapse="+"))),data=df),silent=TRUE)
    if (inherits(m,"try-error")) { vars <- vars[1]; break }
    v <- try(vif(m), silent=TRUE)
    if (inherits(v,"try-error")) break
    v_ok <- v[!is.na(v)]
    if (length(v_ok)==0 || all(v_ok<=10)) break
    vars <- vars[vars != names(which.max(v_ok))]
  }

  if (length(vars) > 1) {
    m <- lm(as.formula(paste(y_col,"~",paste(vars,collapse="+"))),data=df)
    if (summary(m)$r.squared > r2_cap) vars <- vars[1]
  }
  return(vars)
}

Seleksi A2: Top-n/10

seleksi_topn <- function(df, y_col, excl_cols = NULL, r2_cap = 0.8) {
  if (is.null(excl_cols)) excl_cols <- names(df)[!startsWith(names(df), "X")]
  n_domain <- nrow(df)
  max_top  <- min(max(1L, floor(n_domain / 10L)), 8L)

  keep <- sapply(df, function(x) length(unique(na.omit(x))) > 1)
  df   <- df[, keep]

  kandidat <- setdiff(names(df), excl_cols)
  kandidat <- kandidat[startsWith(kandidat, "X")]
  if (length(kandidat) == 0) return(character(0))

  # Pilih 1 representatif per grup X
  kandidat <- pilih_per_grup(df, kandidat, y_col)
  if (length(kandidat) == 0) return(character(0))

  cor_v <- sapply(df[kandidat], function(x)
    abs(cor(x, df[[y_col]], use = "complete.obs")))
  cor_v <- cor_v[!is.na(cor_v)]
  if (length(cor_v) == 0) return(character(0))
  vars  <- names(sort(cor_v, decreasing = TRUE))[1:min(max_top, length(cor_v))]
  vars  <- vars[!is.na(vars)]

  repeat {
    if (length(vars) <= 1) break
    m <- try(lm(as.formula(paste(y_col,"~",paste(vars,collapse="+"))),data=df),silent=TRUE)
    if (inherits(m,"try-error")) { vars <- vars[1]; break }
    v <- try(vif(m), silent=TRUE)
    if (inherits(v,"try-error")) break
    v_ok <- v[!is.na(v)]
    if (length(v_ok)==0 || all(v_ok<=10)) break
    vars <- vars[vars != names(which.max(v_ok))]
  }

  if (length(vars) > 1) {
    m <- lm(as.formula(paste(y_col,"~",paste(vars,collapse="+"))),data=df)
    if (summary(m)$r.squared > r2_cap) vars <- vars[1]
  }
  return(vars)
}

Model: EBLUP

run_eblup <- function(df, vars) {
  df$vardir <- pmax(df$vardir, 1e-10)

  # Trim ke kolom yang dibutuhkan mseFH saja (seperti pola SAE_Lengkap_Fixed2)
  # Kolom ekstra (grup_jenks, cluster_*, RSE_direct_col, dll) dikeluarkan
  # agar mseFH tidak terganggu
  df_m        <- df[, c("Estimasi", "vardir", vars), drop = FALSE]

  # Scale per kolom — lebih aman dari scale() bulk untuk kolom near-constant
  for (v in vars) {
    mu <- mean(df_m[[v]], na.rm = TRUE)
    sd <- sd(df_m[[v]],   na.rm = TRUE)
    df_m[[v]] <- if (is.na(sd) || sd == 0) 0 else (df_m[[v]] - mu) / sd
    df_m[[v]][is.na(df_m[[v]]) | is.nan(df_m[[v]])] <- 0
  }

  form <- as.formula(paste("Estimasi ~", paste(vars, collapse = "+")))
  m    <- try(mseFH(form, vardir = vardir, data = df_m), silent = TRUE)

  if (inherits(m, "try-error") || is.null(m$est) || is.null(m$mse)) {
    df$y_eblup <- df$Estimasi
    df$mse     <- df$vardir
  } else {
    ev  <- as.numeric(m$est$eblup)
    mv  <- as.numeric(m$mse)
    # Per-domain fallback: domain NaN/NA → pakai direct, bukan fallback seluruh segmen
    bad <- is.na(ev) | is.nan(ev) | is.na(mv) | is.nan(mv)
    if (any(bad)) {
      message("  EBLUP: ", sum(bad), " domain NaN → fallback direct per domain")
      ev[bad] <- df$Estimasi[bad]
      mv[bad] <- df$vardir[bad]
    }
    df$y_eblup <- ev
    df$mse     <- mv
  }

  df$mse        <- pmax(df$mse, 0)
  df$RSE_direct <- sqrt(df$vardir) / pmax(df$Estimasi, 1e-6) * 100
  df$RMSE_eblup <- sqrt(df$mse)
  df$RSE_eblup  <- df$RMSE_eblup / pmax(abs(df$y_eblup), 1e-6) * 100
  df
}

Model: GLMM (glmmTMB)

run_glmm <- function(df, vars) {
  eps        <- 1e-4
  df$y_prop  <- pmax(pmin(df$Estimasi / 100, 1 - eps), eps)
  se_prop    <- pmax(df$RSE / 100 * df$y_prop, eps)
  df$n_eff   <- pmax(as.integer(round(df$y_prop * (1 - df$y_prop) / se_prop^2)), 2L)
  df$y_count <- pmin(as.integer(round(df$y_prop * df$n_eff)), df$n_eff)

  df[vars] <- as.data.frame(scale(df[vars]))
  df[vars][is.nan(as.matrix(df[vars]))] <- 0

  form <- as.formula(paste(
    "cbind(y_count, n_eff - y_count) ~",
    paste(vars, collapse = " + "),
    "+ (1 | Kako)"
  ))

  ctrl <- glmmTMBControl(optCtrl = list(iter.max = 400, eval.max = 600))
  m    <- try(glmmTMB(form, data = df, family = binomial, control = ctrl),
              silent = TRUE)

  if (inherits(m, "try-error") || is.null(m)) {
    message("  GLMM fit gagal → fallback direct")
    df$y_glmm   <- df$Estimasi
    df$mse_glmm <- df$vardir
  } else {
    df$y_glmm <- plogis(predict(m, type = "link", re.form = NULL)) * 100

    # Jackknife — seluruh loop dibungkus tryCatch
    # Jika salah satu iterasi fatal crash, fallback ke MSE empiris
    mse_v <- tryCatch({
      n_d      <- nrow(df)
      jack_mat <- matrix(NA_real_, nrow = n_d, ncol = n_d)

      for (i in seq_len(n_d)) {
        df_j <- df[-i, ]
        m_j  <- try(glmmTMB(form, data = df_j, family = binomial,
                             control = glmmTMBControl(
                               optCtrl = list(iter.max = 300, eval.max = 400))),
                    silent = TRUE)
        if (!inherits(m_j, "try-error")) {
          pj <- try(plogis(predict(m_j, newdata = df, type = "link",
                                    re.form = NA,
                                    allow.new.levels = TRUE)) * 100,
                    silent = TRUE)
          if (!inherits(pj, "try-error") && length(pj) == n_d)
            jack_mat[i, ] <- pj
        }
      }

      vapply(seq_len(n_d), function(j) {
        ev <- jack_mat[, j][!is.na(jack_mat[, j])]
        if (length(ev) >= 2) {
          tb <- mean(ev)
          ((length(ev) - 1) / length(ev)) * mean((ev - tb)^2)
        } else {
          (df$RSE[j] / 100 * df$y_glmm[j])^2   # fallback per-domain
        }
      }, numeric(1))

    }, error = function(e) {
      # Jackknife crash total → fallback ke MSE empiris semua domain
      message("  Jackknife crash: ", e$message, " → fallback MSE empiris")
      (df$RSE / 100 * df$y_glmm)^2
    })

    df$mse_glmm <- pmax(mse_v, 0)
  }

  df$RSE_direct <- df$RSE
  df$RMSE_glmm  <- sqrt(df$mse_glmm)
  df$RSE_glmm   <- df$RMSE_glmm / pmax(df$y_glmm, 1e-6) * 100
  df
}

Model: EB Beta-Binomial

#' EB Beta-Binomial dengan covariate:
#' (1) Regresi logit(p) ~ vars → mu_i (domain-specific prior mean)
#' (2) Estimasi phi (precision) via MOM dari {p_i}
#' (3) EB: p_hat_i = (a_i + y_i) / (a_i + b_i + n_eff_i) x 100
#' (4) Var posterior analytik → RSE_eb
run_eb_beta <- function(df, vars) {
  eps       <- 1e-4
  df$y_prop <- pmax(pmin(df$Estimasi / 100, 1 - eps), eps)

  se_prop    <- pmax(df$RSE / 100 * df$y_prop, eps)
  df$n_eff   <- pmax(as.integer(round(df$y_prop * (1 - df$y_prop) / se_prop^2)), 2L)
  df$y_count <- pmin(as.integer(round(df$y_prop * df$n_eff)), df$n_eff)

  df$y_logit_eb <- log(df$y_prop / (1 - df$y_prop))

  df[vars] <- as.data.frame(scale(df[vars]))
  df[vars][is.nan(as.matrix(df[vars]))] <- 0

  # Regresi untuk mu_i (prior mean per domain)
  form_reg <- as.formula(paste("y_logit_eb ~", paste(vars, collapse = "+")))
  reg      <- try(lm(form_reg, data = df), silent = TRUE)
  mu_i     <- if (inherits(reg, "try-error")) {
    df$y_prop
  } else {
    pmax(pmin(plogis(fitted(reg)), 1 - eps), eps)
  }

  # MOM untuk phi (precision)
  p_bar  <- mean(df$y_prop)
  var_p  <- var(df$y_prop)
  phi    <- max((p_bar * (1 - p_bar) / var_p) - 1, 0.01)

  a_i <- mu_i * phi
  b_i <- (1 - mu_i) * phi
  n_i <- df$n_eff
  y_i <- df$y_count

  # EB estimator
  a_post   <- a_i + y_i
  b_post   <- b_i + (n_i - y_i)
  denom    <- a_i + b_i + n_i
  p_hat_eb <- a_post / denom

  df$y_eb_pct <- p_hat_eb * 100

  # Var posterior analytik (Beta conjugate)
  var_post      <- (a_post * b_post) / (denom^2 * (denom + 1))
  df$var_eb_pct <- var_post * 100^2

  df$RSE_direct <- df$RSE
  df$RMSE_eb    <- sqrt(df$var_eb_pct)
  df$RSE_eb     <- df$RMSE_eb / pmax(df$y_eb_pct, 1e-6) * 100
  df
}

Model: HB Beta

run_hbbeta <- function(df, vars,
                       has_jags = get("has_jags", envir = .GlobalEnv)) {
  if (!has_jags) {
    df$y_hb_pct <- df$Estimasi; df$sd_hb <- NA_real_; df$RSE_hb <- df$RSE
    return(df)
  }
  eps       <- 1e-4
  df$y_prop <- pmax(pmin(df$Estimasi / 100, 1 - eps), eps)
  df[vars]  <- as.data.frame(scale(df[vars]))
  df[vars][is.nan(as.matrix(df[vars]))] <- 0
  form_hb   <- as.formula(paste("y_prop ~", paste(vars, collapse = "+")))
  hb <- try(saeHB::Beta(
    formula = form_hb, data = df,
    iter.update = 15, iter.mcmc = 4000, thin = 10, burn.in = 1000
  ), silent = TRUE)
  if (inherits(hb, "try-error") || is.null(hb$Est)) {
    message("  HB Beta gagal → fallback direct")
    df$y_hb_pct <- df$Estimasi; df$sd_hb <- NA_real_; df$RSE_hb <- df$RSE
  } else {
    df$y_hb_pct <- hb$Est$MEAN * 100
    df$sd_hb    <- hb$Est$SD
    df$RSE_hb   <- df$sd_hb / pmax(df$y_hb_pct / 100, 1e-6) * 100
  }
  df
}

Fungsi Plot & Evaluasi

# ── Plot RSE line (2 metode) ─────────────────────────────────────────────
plot_rse <- function(data, col1, col2, lab1, lab2, title) {
  miss <- setdiff(c(col1, col2), names(data))
  if (length(miss) > 0) { message("Kolom tidak ada: ", paste(miss)); return(invisible(NULL)) }
  df_plt <- data %>%
    dplyr::select(Kako, all_of(c(col1, col2))) %>%
    pivot_longer(-Kako, names_to = "Metode", values_to = "RSE") %>%
    mutate(Metode = case_when(Metode == col1 ~ lab1, Metode == col2 ~ lab2, TRUE ~ Metode),
           Kako = as.character(Kako)) %>%
    filter(!is.na(RSE))
  ggplot(df_plt, aes(x = Kako, y = RSE, group = Metode, color = Metode)) +
    geom_line(linewidth = 0.8, alpha = 0.85) +
    geom_point(size = 1.6) +
    geom_hline(yintercept = 25, linetype = "dashed", color = "#c0392b", linewidth = 0.9) +
    annotate("text", x = 1, y = 26.5, label = "Batas 25%", color = "#c0392b",
             hjust = 0, size = 3) +
    scale_color_manual(values = c("#2980b9", "#27ae60")) +
    labs(title = title, x = "Kabupaten/Kota", y = "RSE (%)", color = NULL) +
    theme_minimal(base_size = 11) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 6),
          legend.position = "top", plot.title = element_text(face = "bold", size = 12))
}

# ── Plot HB Beta Posterior (dot + CI per domain) ─────────────────────────
plot_hb_posterior <- function(df, title, group_col = NULL) {
  eps   <- 1e-4
  df_p  <- df %>%
    filter(!is.na(y_hb_pct) & !is.na(sd_hb)) %>%
    mutate(
      ci_lo  = pmax((y_hb_pct / 100 - 1.96 * sd_hb) * 100, 0),
      ci_hi  = pmin((y_hb_pct / 100 + 1.96 * sd_hb) * 100, 100),
      Domain = reorder(as.character(Kako), y_hb_pct)
    )
  if (!is.null(group_col) && group_col %in% names(df_p)) {
    df_p$Grup <- df_p[[group_col]]
  } else {
    df_p$Grup <- "Semua Domain"
  }

  p <- ggplot(df_p, aes(x = Domain)) +
    geom_errorbar(aes(ymin = ci_lo, ymax = ci_hi, color = Grup),
                  width = 0.3, alpha = 0.55) +
    geom_point(aes(y = y_hb_pct, color = Grup), size = 2) +
    geom_point(aes(y = Estimasi), color = "#c0392b", size = 1.2, alpha = 0.6, shape = 4) +
    geom_hline(yintercept = mean(df_p$y_hb_pct, na.rm = TRUE),
               linetype = "dashed", color = "#2ecc71", linewidth = 0.8) +
    scale_color_manual(values = c("#2980b9","#e67e22","#8e44ad","#16a085")) +
    labs(title    = title,
         subtitle = "Titik biru = HB | Error bar = 95% CI | Silang merah = Direct | Garis hijau = Rata-rata HB",
         x = "Kabupaten/Kota (urut HB)", y = "Estimasi (%)", color = "Grup") +
    theme_minimal(base_size = 10) +
    theme(axis.text.x  = element_text(angle = 90, vjust = 0.5, size = 5),
          legend.position = "top",
          plot.title      = element_text(face = "bold", size = 12),
          plot.subtitle   = element_text(size = 9))
  p
}

# ── Utilitas evaluasi ────────────────────────────────────────────────────
pct_ok     <- function(x)     round(mean(x < 25, na.rm = TRUE) * 100, 1)
pct_ok_15  <- function(x)     round(mean(x < 15, na.rm = TRUE) * 100, 1)
cv_mean    <- function(x)     round(mean(x, na.rm = TRUE), 2)

tabel_rse <- function(data, var_rse, label) {
  data.frame(
    Skenario      = label,
    `RSE >= 25`   = sum(data[[var_rse]] >= 25, na.rm = TRUE),
    `RSE < 25`    = sum(data[[var_rse]] < 25,  na.rm = TRUE),
    `% < 25%`     = pct_ok(data[[var_rse]]),
    `% < 15%`     = pct_ok_15(data[[var_rse]]),
    `CV Mean`     = cv_mean(data[[var_rse]]),
    check.names   = FALSE
  )
}

metrik <- function(est_model, est_direct, label) {
  data.frame(
    Skenario  = label,
    `RB (%)`  = round(mean((est_model - est_direct) / est_direct, na.rm=TRUE)*100, 3),
    `RMSE`    = round(sqrt(mean((est_model - est_direct)^2, na.rm=TRUE)), 4),
    check.names = FALSE
  )
}

# ── Fungsi utama: jalankan model pada list segmen ────────────────────────
# aux_fn  : seleksi_backward atau seleksi_topn
# model_fn: run_eblup / run_glmm / run_eb_beta / run_hbbeta
# y_col   : "Estimasi" (EBLUP) atau "y_logit" (GLMM/EB/HB)
# Catatan: excl_cols tidak perlu dioper — kedua fungsi seleksi
#          sudah auto-detect kolom non-X secara dinamis
run_on_segments <- function(seg_list, aux_fn, model_fn, y_col,
                             min_n = 4, ...) {
  hasil <- list()
  for (nm in names(seg_list)) {
    df_s <- seg_list[[nm]]
    if (nrow(df_s) < min_n) { message("  Skip ", nm, " (n=", nrow(df_s), ")"); next }
    vars <- tryCatch(
      aux_fn(df_s, y_col = y_col),
      error = function(e) { message("  VarSel error [", nm, "]: ", e$message); character(0) }
    )
    if (length(vars) == 0) {
      message("  Tidak ada var terpilih untuk ", nm, " — skip")
      next
    }
    cat("  [", nm, "]", y_col, "| n =", nrow(df_s),
        "| Vars:", paste(vars, collapse=", "), "\n")
    res <- tryCatch(
      model_fn(df_s, vars, ...),
      error = function(e) {
        message("  Model error [", nm, "]: ", e$message)
        NULL
      }
    )
    if (!is.null(res)) hasil[[nm]] <- res
  }
  if (length(hasil) == 0) return(NULL)
  bind_rows(hasil)
}

4. Pembagian Data

C2 — RSE Natural Break (Jenks)

# RSE_direct_col: hanya untuk pembagian partisi, BUKAN kandidat aux
# Karena nama tidak berawalan "X", otomatis tereksklusi oleh kedua fungsi seleksi
df_base$RSE_direct_col <- sqrt(df_base$vardir) / pmax(df_base$Estimasi, 1e-6) * 100

brk_jenks <- classIntervals(df_base$RSE_direct_col, n = 2, style = "jenks")
df_base$grup_jenks <- cut(df_base$RSE_direct_col,
                          breaks = brk_jenks$brks,
                          labels = c("G1_RSE_Rendah", "G2_RSE_Tinggi"),
                          include.lowest = TRUE)

cat("Cut-off Jenks:", round(brk_jenks$brks[2], 2), "%\n")
## Cut-off Jenks: 53.58 %
table(df_base$grup_jenks)
## 
## G1_RSE_Rendah G2_RSE_Tinggi 
##            62            19

C3 — RSE Equal Size (Median)

med_rse <- median(df_base$RSE_direct_col, na.rm = TRUE)
df_base$grup_equal <- ifelse(df_base$RSE_direct_col <= med_rse,
                              "G1_RSE_Bawah", "G2_RSE_Atas")
cat("Median RSE:", round(med_rse, 2), "%\n")
## Median RSE: 44.36 %
table(df_base$grup_equal)
## 
## G1_RSE_Bawah  G2_RSE_Atas 
##           41           40

C4 — Cluster-Aux (k=2)

# Fungsi helper cluster
make_cluster <- function(df_full, vars_cl, seed = 42) {
  mat <- scale(df_full[, vars_cl, drop = FALSE])
  mat[is.nan(mat)] <- 0
  set.seed(seed)
  km  <- kmeans(mat, centers = 2, nstart = 50, iter.max = 200)
  paste0("Cluster_", km$cluster)
}

# === Global var selection (ALL data) untuk menentukan vars clustering ===

# A1 (Backward) — Normal link
vars_gbk_normal <- seleksi_backward(df_base, y_col = "Estimasi")
cat("Cluster-A1-Normal vars:", paste(vars_gbk_normal, collapse=", "), "\n")
## Cluster-A1-Normal vars: X45, X28
df_base$cluster_bk_normal <- make_cluster(df_base, vars_gbk_normal)

# A2 (TopN) — Normal link
vars_gtn_normal <- seleksi_topn(df_base, y_col = "Estimasi")
cat("Cluster-A2-Normal vars:", paste(vars_gtn_normal, collapse=", "), "\n")
## Cluster-A2-Normal vars: X45, X8, X36_mean, X28, X29_mean, X30_mean, X31_mean
df_base$cluster_tn_normal <- make_cluster(df_base, vars_gtn_normal)

# A1 (Backward) — Logit link
vars_gbk_logit <- seleksi_backward(df_base, y_col = "y_logit")
cat("Cluster-A1-Logit vars:", paste(vars_gbk_logit, collapse=", "), "\n")
## Cluster-A1-Logit vars: X8, X45, X44
df_base$cluster_bk_logit <- make_cluster(df_base, vars_gbk_logit)

# A2 (TopN) — Logit link
vars_gtn_logit <- seleksi_topn(df_base, y_col = "y_logit")
cat("Cluster-A2-Logit vars:", paste(vars_gtn_logit, collapse=", "), "\n")
## Cluster-A2-Logit vars: X8, X45, X44, X39, X36_mean, X30_mean, X28, X1
df_base$cluster_tn_logit <- make_cluster(df_base, vars_gtn_logit)

# Ringkasan cluster
kable(
  data.frame(
    Cluster_Tipe    = c("A1-Normal","A2-Normal","A1-Logit","A2-Logit"),
    Variabel_Global = c(paste(vars_gbk_normal, collapse=", "),
                        paste(vars_gtn_normal, collapse=", "),
                        paste(vars_gbk_logit,  collapse=", "),
                        paste(vars_gtn_logit,  collapse=", "))
  ), caption = "Variabel Global untuk Pembentukan Cluster") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed"))
Variabel Global untuk Pembentukan Cluster
Cluster_Tipe Variabel_Global
A1-Normal X45, X28
A2-Normal X45, X8, X36_mean, X28, X29_mean, X30_mean, X31_mean
A1-Logit X8, X45, X44
A2-Logit X8, X45, X44, X39, X36_mean, X30_mean, X28, X1

Distribusi Semua Partisi

p1 <- ggplot(df_base, aes(x = RSE_direct_col, fill = grup_jenks)) +
  geom_histogram(bins = 25, color = "white", alpha = 0.85) +
  geom_vline(xintercept = brk_jenks$brks[2], linetype="dashed", color="#c0392b") +
  scale_fill_manual(values = c("#2980b9","#e67e22")) +
  labs(title = "C2: RSE Natural Break (Jenks)", x = "RSE Direct (%)", y = "n", fill=NULL) +
  theme_minimal(base_size = 11) + theme(legend.position = "top")

p2 <- ggplot(df_base, aes(x = RSE_direct_col, fill = grup_equal)) +
  geom_histogram(bins = 25, color = "white", alpha = 0.85) +
  geom_vline(xintercept = med_rse, linetype="dashed", color="#c0392b") +
  scale_fill_manual(values = c("#2980b9","#e67e22")) +
  labs(title = "C3: RSE Equal Size (Median)", x = "RSE Direct (%)", y = "n", fill=NULL) +
  theme_minimal(base_size = 11) + theme(legend.position = "top")

p3 <- ggplot(df_base, aes(x = Estimasi, fill = cluster_bk_normal)) +
  geom_boxplot(alpha = 0.8) +
  scale_fill_manual(values = c("#2980b9","#27ae60")) +
  labs(title = "C4 Cluster (A1-Normal)", x = "Estimasi (%)", fill=NULL) +
  theme_minimal(base_size = 11) + theme(legend.position = "top")

p4 <- ggplot(df_base, aes(x = Estimasi, fill = cluster_bk_logit)) +
  geom_boxplot(alpha = 0.8) +
  scale_fill_manual(values = c("#2980b9","#27ae60")) +
  labs(title = "C4 Cluster (A1-Logit)", x = "Estimasi (%)", fill=NULL) +
  theme_minimal(base_size = 11) + theme(legend.position = "top")

(p1 | p2) / (p3 | p4)


7. EB Beta-Binomial Empirical Bayes

Skenario 17–24 — EB Beta-Binomial. Regresi logit(p) ~ vars menghasilkan μᵢ (prior mean domain-spesifik). Presisi φ diestimasi via Method of Moments. Estimasi EB: p̂ᵢ = (aᵢ + yᵢ) / (aᵢ + bᵢ + nᵢ). MSE dari variansi posterior Beta analytik.

S17 — Backward · All

cat("=== Skenario 17: EB Backward All ===\n")
## === Skenario 17: EB Backward All ===
df_s17 <- run_on_segments(
  seg_list = list("All" = df_base),
  aux_fn   = seleksi_backward, model_fn = run_eb_beta, y_col = "y_logit"
)
##   [ All ] y_logit | n = 81 | Vars: X8, X45, X44
summary(df_s17[, c("Estimasi","y_eb_pct","RSE_direct","RSE_eb")])
##     Estimasi        y_eb_pct        RSE_direct        RSE_eb    
##  Min.   : 0.08   Min.   : 0.134   Min.   : 22.9   Min.   :21.0  
##  1st Qu.: 6.18   1st Qu.: 6.668   1st Qu.: 38.3   1st Qu.:31.5  
##  Median : 9.32   Median : 9.905   Median : 44.4   Median :35.3  
##  Mean   :10.08   Mean   : 9.410   Mean   : 47.6   Mean   :37.7  
##  3rd Qu.:13.93   3rd Qu.:12.489   3rd Qu.: 53.5   3rd Qu.:40.7  
##  Max.   :24.05   Max.   :19.161   Max.   :101.7   Max.   :83.7
plot_rse(df_s17,"RSE_direct","RSE_eb","Direct","EB-BK All","S17: EB Beta-Binomial Backward — All")
S17: EB Beta Backward All

S17: EB Beta Backward All

S18 — Backward · RSE-NB

cat("=== Skenario 18: EB Backward RSE-NB ===\n")
## === Skenario 18: EB Backward RSE-NB ===
df_s18 <- run_on_segments(
  seg_list = split(df_base, df_base$grup_jenks),
  aux_fn   = seleksi_backward, model_fn = run_eb_beta, y_col = "y_logit"
)
##   [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X29_mean 
##   [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8
summary(df_s18[, c("Estimasi","y_eb_pct","RSE_direct","RSE_eb")])
##     Estimasi        y_eb_pct        RSE_direct        RSE_eb    
##  Min.   : 0.08   Min.   : 0.106   Min.   : 22.9   Min.   :20.1  
##  1st Qu.: 6.18   1st Qu.: 6.688   1st Qu.: 38.3   1st Qu.:28.7  
##  Median : 9.32   Median : 9.823   Median : 44.4   Median :31.6  
##  Mean   :10.08   Mean   : 9.486   Mean   : 47.6   Mean   :36.3  
##  3rd Qu.:13.93   3rd Qu.:12.608   3rd Qu.: 53.5   3rd Qu.:40.7  
##  Max.   :24.05   Max.   :18.708   Max.   :101.7   Max.   :87.1
plot_rse(df_s18,"RSE_direct","RSE_eb","Direct","EB-BK RSE-NB","S18: EB Beta-Binomial Backward — RSE NB")
S18: EB Beta Backward RSE Natural Break

S18: EB Beta Backward RSE Natural Break

S19 — Backward · RSE-ES

cat("=== Skenario 19: EB Backward RSE-ES ===\n")
## === Skenario 19: EB Backward RSE-ES ===
df_s19 <- run_on_segments(
  seg_list = split(df_base, df_base$grup_equal),
  aux_fn   = seleksi_backward, model_fn = run_eb_beta, y_col = "y_logit"
)
##   [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah 
##   [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45
summary(df_s19[, c("Estimasi","y_eb_pct","RSE_direct","RSE_eb")])
##     Estimasi        y_eb_pct        RSE_direct        RSE_eb    
##  Min.   : 0.08   Min.   : 0.183   Min.   : 22.9   Min.   :20.1  
##  1st Qu.: 6.18   1st Qu.: 6.204   1st Qu.: 38.3   1st Qu.:27.5  
##  Median : 9.32   Median : 8.313   Median : 44.4   Median :33.4  
##  Mean   :10.08   Mean   : 9.469   Mean   : 47.6   Mean   :35.4  
##  3rd Qu.:13.93   3rd Qu.:13.217   3rd Qu.: 53.5   3rd Qu.:39.9  
##  Max.   :24.05   Max.   :19.118   Max.   :101.7   Max.   :77.3
plot_rse(df_s19,"RSE_direct","RSE_eb","Direct","EB-BK RSE-ES","S19: EB Beta-Binomial Backward — RSE ES")
S19: EB Beta Backward RSE Equal Size

S19: EB Beta Backward RSE Equal Size

S20 — Backward · Cluster

cat("=== Skenario 20: EB Backward Cluster ===\n")
## === Skenario 20: EB Backward Cluster ===
df_s20 <- run_on_segments(
  seg_list = split(df_base, df_base$cluster_bk_logit),
  aux_fn   = seleksi_backward, model_fn = run_eb_beta, y_col = "y_logit"
)
##   [ Cluster_1 ] y_logit | n = 10 | Vars: X30 
##   [ Cluster_2 ] y_logit | n = 71 | Vars: X45_mean, X25_jumlah, X33_mean
summary(df_s20[, c("Estimasi","y_eb_pct","RSE_direct","RSE_eb")])
##     Estimasi        y_eb_pct         RSE_direct        RSE_eb    
##  Min.   : 0.08   Min.   : 0.0873   Min.   : 22.9   Min.   :21.4  
##  1st Qu.: 6.18   1st Qu.: 6.5408   1st Qu.: 38.3   1st Qu.:30.4  
##  Median : 9.32   Median : 9.7703   Median : 44.4   Median :33.9  
##  Mean   :10.08   Mean   : 9.4931   Mean   : 47.6   Mean   :36.4  
##  3rd Qu.:13.93   3rd Qu.:12.3782   3rd Qu.: 53.5   3rd Qu.:38.0  
##  Max.   :24.05   Max.   :17.7431   Max.   :101.7   Max.   :95.8
plot_rse(df_s20,"RSE_direct","RSE_eb","Direct","EB-BK Cluster","S20: EB Beta-Binomial Backward — Cluster")
S20: EB Beta Backward Cluster-Aux

S20: EB Beta Backward Cluster-Aux

S21 — Top-n/10 · All

cat("=== Skenario 21: EB TopN All ===\n")
## === Skenario 21: EB TopN All ===
df_s21 <- run_on_segments(
  seg_list = list("All" = df_base),
  aux_fn   = seleksi_topn, model_fn = run_eb_beta, y_col = "y_logit"
)
##   [ All ] y_logit | n = 81 | Vars: X8, X45, X44, X39, X36_mean, X30_mean, X28, X1
summary(df_s21[, c("Estimasi","y_eb_pct","RSE_direct","RSE_eb")])
##     Estimasi        y_eb_pct        RSE_direct        RSE_eb    
##  Min.   : 0.08   Min.   : 0.122   Min.   : 22.9   Min.   :21.2  
##  1st Qu.: 6.18   1st Qu.: 6.516   1st Qu.: 38.3   1st Qu.:31.4  
##  Median : 9.32   Median : 9.938   Median : 44.4   Median :35.2  
##  Mean   :10.08   Mean   : 9.458   Mean   : 47.6   Mean   :37.7  
##  3rd Qu.:13.93   3rd Qu.:12.728   3rd Qu.: 53.5   3rd Qu.:41.1  
##  Max.   :24.05   Max.   :18.843   Max.   :101.7   Max.   :82.7
plot_rse(df_s21,"RSE_direct","RSE_eb","Direct","EB-TN All","S21: EB Beta-Binomial Top-n/10 — All")
S21: EB Beta Top-n/10 All

S21: EB Beta Top-n/10 All

S22 — Top-n/10 · RSE-NB

cat("=== Skenario 22: EB TopN RSE-NB ===\n")
## === Skenario 22: EB TopN RSE-NB ===
df_s22 <- run_on_segments(
  seg_list = split(df_base, df_base$grup_jenks),
  aux_fn   = seleksi_topn, model_fn = run_eb_beta, y_col = "y_logit"
)
##   [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X36_mean, X29_mean, X31_mean, X26 
##   [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8
summary(df_s22[, c("Estimasi","y_eb_pct","RSE_direct","RSE_eb")])
##     Estimasi        y_eb_pct        RSE_direct        RSE_eb    
##  Min.   : 0.08   Min.   : 0.106   Min.   : 22.9   Min.   :20.2  
##  1st Qu.: 6.18   1st Qu.: 6.688   1st Qu.: 38.3   1st Qu.:28.7  
##  Median : 9.32   Median : 9.829   Median : 44.4   Median :31.5  
##  Mean   :10.08   Mean   : 9.488   Mean   : 47.6   Mean   :36.3  
##  3rd Qu.:13.93   3rd Qu.:12.589   3rd Qu.: 53.5   3rd Qu.:41.2  
##  Max.   :24.05   Max.   :18.584   Max.   :101.7   Max.   :87.1
plot_rse(df_s22,"RSE_direct","RSE_eb","Direct","EB-TN RSE-NB","S22: EB Beta-Binomial Top-n/10 — RSE NB")
S22: EB Beta Top-n/10 RSE Natural Break

S22: EB Beta Top-n/10 RSE Natural Break

S23 — Top-n/10 · RSE-ES

cat("=== Skenario 23: EB TopN RSE-ES ===\n")
## === Skenario 23: EB TopN RSE-ES ===
df_s23 <- run_on_segments(
  seg_list = split(df_base, df_base$grup_equal),
  aux_fn   = seleksi_topn, model_fn = run_eb_beta, y_col = "y_logit"
)
##   [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah, X10_jumlah, X8 
##   [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45, X8, X1, X40_jumlah
summary(df_s23[, c("Estimasi","y_eb_pct","RSE_direct","RSE_eb")])
##     Estimasi        y_eb_pct        RSE_direct        RSE_eb    
##  Min.   : 0.08   Min.   : 0.159   Min.   : 22.9   Min.   :19.9  
##  1st Qu.: 6.18   1st Qu.: 6.607   1st Qu.: 38.3   1st Qu.:27.5  
##  Median : 9.32   Median : 8.383   Median : 44.4   Median :32.7  
##  Mean   :10.08   Mean   : 9.541   Mean   : 47.6   Mean   :35.3  
##  3rd Qu.:13.93   3rd Qu.:13.269   3rd Qu.: 53.5   3rd Qu.:40.5  
##  Max.   :24.05   Max.   :19.361   Max.   :101.7   Max.   :76.9
plot_rse(df_s23,"RSE_direct","RSE_eb","Direct","EB-TN RSE-ES","S23: EB Beta-Binomial Top-n/10 — RSE ES")
S23: EB Beta Top-n/10 RSE Equal Size

S23: EB Beta Top-n/10 RSE Equal Size

S24 — Top-n/10 · Cluster

cat("=== Skenario 24: EB TopN Cluster ===\n")
## === Skenario 24: EB TopN Cluster ===
df_s24 <- run_on_segments(
  seg_list = split(df_base, df_base$cluster_tn_logit),
  aux_fn   = seleksi_topn, model_fn = run_eb_beta, y_col = "y_logit"
)
##   [ Cluster_1 ] y_logit | n = 67 | Vars: X45, X25_jumlah, X11_mean, X26_mean, X43_mean, X10_jumlah 
##   [ Cluster_2 ] y_logit | n = 14 | Vars: X30
summary(df_s24[, c("Estimasi","y_eb_pct","RSE_direct","RSE_eb")])
##     Estimasi        y_eb_pct         RSE_direct        RSE_eb    
##  Min.   : 0.08   Min.   : 0.0893   Min.   : 22.9   Min.   :20.8  
##  1st Qu.: 6.18   1st Qu.: 5.9527   1st Qu.: 38.3   1st Qu.:30.4  
##  Median : 9.32   Median : 9.6409   Median : 44.4   Median :33.6  
##  Mean   :10.08   Mean   : 9.4755   Mean   : 47.6   Mean   :36.4  
##  3rd Qu.:13.93   3rd Qu.:12.0506   3rd Qu.: 53.5   3rd Qu.:38.4  
##  Max.   :24.05   Max.   :18.3517   Max.   :101.7   Max.   :95.1
plot_rse(df_s24,"RSE_direct","RSE_eb","Direct","EB-TN Cluster","S24: EB Beta-Binomial Top-n/10 — Cluster")
S24: EB Beta Top-n/10 Cluster-Aux

S24: EB Beta Top-n/10 Cluster-Aux


8. HB Beta-Binomial saeHB

Prasyarat JAGS: Skenario 25–32 memerlukan JAGS terinstal di OS dan paket saeHB. Download: https://sourceforge.net/projects/mcmc-jags/files/JAGS/4.x/ lalu restart R. Jika JAGS tidak tersedia, otomatis fallback ke direct estimator. Setiap skenario HB menyertakan plot posterior dengan credible interval 95%.

S25 — Backward · All

cat("=== Skenario 25: HB Backward All ===\n")
## === Skenario 25: HB Backward All ===
df_s25 <- run_on_segments(
  seg_list = list("All" = df_base),
  aux_fn   = seleksi_backward, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)
##   [ All ] y_logit | n = 81 | Vars: X8, X45, X44

##              Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept -2.3058  0.0150 -2.3363 -2.3162 -2.3073 -2.2950 -2.27
## X8         0.3289  0.0232  0.2803  0.3148  0.3282  0.3448  0.37
## X45       -0.2679  0.0224 -0.3073 -0.2826 -0.2689 -0.2526 -0.22
## X44        0.1447  0.0216  0.1031  0.1297  0.1448  0.1570  0.19
summary(df_s25[, c("Estimasi","y_hb_pct","RSE","RSE_hb")])
##     Estimasi        y_hb_pct          RSE            RSE_hb    
##  Min.   : 0.08   Min.   : 2.14   Min.   : 22.9   Min.   :18.7  
##  1st Qu.: 6.18   1st Qu.: 7.67   1st Qu.: 38.3   1st Qu.:22.0  
##  Median : 9.32   Median :10.10   Median : 44.4   Median :24.4  
##  Mean   :10.08   Mean   :10.13   Mean   : 47.6   Mean   :25.1  
##  3rd Qu.:13.93   3rd Qu.:12.70   3rd Qu.: 53.5   3rd Qu.:26.4  
##  Max.   :24.05   Max.   :18.58   Max.   :101.7   Max.   :36.0
plot_rse(df_s25,"RSE","RSE_hb","Direct","HB-BK All","S25: HB Beta Backward — All Domain")
S25: HB Beta Backward RSE

S25: HB Beta Backward RSE

plot_hb_posterior(df_s25, "S25: HB Beta Backward — All Domain")
S25: HB Beta Posterior — All Domain

S25: HB Beta Posterior — All Domain

S26 — Backward · RSE-NB

cat("=== Skenario 26: HB Backward RSE-NB ===\n")
## === Skenario 26: HB Backward RSE-NB ===
df_s26 <- run_on_segments(
  seg_list = split(df_base, df_base$grup_jenks),
  aux_fn   = seleksi_backward, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)
##   [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X29_mean

##              Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept -2.1075  0.0136 -2.1339 -2.1165 -2.1068 -2.0985 -2.08
## X8         0.1669  0.0158  0.1349  0.1570  0.1668  0.1778  0.20
## X29_mean  -0.1367  0.0135 -0.1636 -0.1454 -0.1367 -0.1270 -0.11
##   [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8

##              Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept -3.2210  0.0480 -3.3032 -3.2565 -3.2222 -3.1878 -3.12
## X8         0.5560  0.0441  0.4721  0.5255  0.5571  0.5840  0.65
summary(df_s26[, c("Estimasi","y_hb_pct","RSE","RSE_hb")])
##     Estimasi        y_hb_pct          RSE            RSE_hb     
##  Min.   : 0.08   Min.   : 1.13   Min.   : 22.9   Min.   : 4.84  
##  1st Qu.: 6.18   1st Qu.: 6.13   1st Qu.: 38.3   1st Qu.: 6.66  
##  Median : 9.32   Median : 9.08   Median : 44.4   Median : 8.38  
##  Mean   :10.08   Mean   :10.09   Mean   : 47.6   Mean   :14.24  
##  3rd Qu.:13.93   3rd Qu.:13.76   3rd Qu.: 53.5   3rd Qu.:12.23  
##  Max.   :24.05   Max.   :23.66   Max.   :101.7   Max.   :45.98
plot_rse(df_s26,"RSE","RSE_hb","Direct","HB-BK RSE-NB","S26: HB Beta Backward — RSE Natural Break")
S26: HB Beta Backward RSE Natural Break

S26: HB Beta Backward RSE Natural Break

plot_hb_posterior(df_s26, "S26: HB Beta Backward — RSE Natural Break", group_col = "grup_jenks")
S26: HB Beta Posterior — RSE Natural Break

S26: HB Beta Posterior — RSE Natural Break

S27 — Backward · RSE-ES

cat("=== Skenario 27: HB Backward RSE-ES ===\n")
## === Skenario 27: HB Backward RSE-ES ===
df_s27 <- run_on_segments(
  seg_list = split(df_base, df_base$grup_equal),
  aux_fn   = seleksi_backward, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)
##   [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah

##               Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept  -1.9493  0.0151 -1.9770 -1.9602 -1.9491 -1.9393 -1.92
## X19        -0.2239  0.0164 -0.2576 -0.2358 -0.2228 -0.2136 -0.19
## X29_jumlah -0.2052  0.0159 -0.2338 -0.2180 -0.2065 -0.1936 -0.17
##   [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45

##              Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept -2.7097  0.0217 -2.7477 -2.7258 -2.7103 -2.6926 -2.67
## X45       -0.4698  0.0237 -0.5167 -0.4853 -0.4692 -0.4553 -0.42
summary(df_s27[, c("Estimasi","y_hb_pct","RSE","RSE_hb")])
##     Estimasi        y_hb_pct          RSE            RSE_hb     
##  Min.   : 0.08   Min.   : 1.83   Min.   : 22.9   Min.   : 9.04  
##  1st Qu.: 6.18   1st Qu.: 6.78   1st Qu.: 38.3   1st Qu.:12.42  
##  Median : 9.32   Median : 8.60   Median : 44.4   Median :18.55  
##  Mean   :10.08   Mean   :10.14   Mean   : 47.6   Mean   :17.74  
##  3rd Qu.:13.93   3rd Qu.:13.46   3rd Qu.: 53.5   3rd Qu.:21.18  
##  Max.   :24.05   Max.   :23.01   Max.   :101.7   Max.   :29.50
plot_rse(df_s27,"RSE","RSE_hb","Direct","HB-BK RSE-ES","S27: HB Beta Backward — RSE Equal Size")
S27: HB Beta Backward RSE Equal Size

S27: HB Beta Backward RSE Equal Size

plot_hb_posterior(df_s27, "S27: HB Beta Backward — RSE Equal Size", group_col = "grup_equal")
S27: HB Beta Posterior — RSE Equal Size

S27: HB Beta Posterior — RSE Equal Size

S28 — Backward · Cluster

cat("=== Skenario 28: HB Backward Cluster ===\n")
## === Skenario 28: HB Backward Cluster ===
df_s28 <- run_on_segments(
  seg_list = split(df_base, df_base$cluster_bk_logit),
  aux_fn   = seleksi_backward, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)
##   [ Cluster_1 ] y_logit | n = 10 | Vars: X30

##              Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept -3.4966  0.0734 -3.6398 -3.5469 -3.4928 -3.4427 -3.38
## X30       -0.6634  0.0785 -0.8073 -0.7180 -0.6647 -0.6077 -0.52
##   [ Cluster_2 ] y_logit | n = 71 | Vars: X45_mean, X25_jumlah, X33_mean

##               Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept  -2.1843  0.0168 -2.2140 -2.1959 -2.1848 -2.1734 -2.15
## X45_mean   -0.1498  0.0187 -0.1859 -0.1624 -0.1504 -0.1352 -0.11
## X25_jumlah -0.1359  0.0174 -0.1668 -0.1479 -0.1361 -0.1237 -0.10
## X33_mean   -0.1479  0.0188 -0.1841 -0.1599 -0.1487 -0.1350 -0.11
summary(df_s28[, c("Estimasi","y_hb_pct","RSE","RSE_hb")])
##     Estimasi        y_hb_pct           RSE            RSE_hb    
##  Min.   : 0.08   Min.   : 0.586   Min.   : 22.9   Min.   :16.1  
##  1st Qu.: 6.18   1st Qu.: 7.259   1st Qu.: 38.3   1st Qu.:20.2  
##  Median : 9.32   Median : 9.620   Median : 44.4   Median :22.7  
##  Mean   :10.08   Mean   :10.076   Mean   : 47.6   Mean   :24.4  
##  3rd Qu.:13.93   3rd Qu.:12.465   3rd Qu.: 53.5   3rd Qu.:25.7  
##  Max.   :24.05   Max.   :20.724   Max.   :101.7   Max.   :51.7
plot_rse(df_s28,"RSE","RSE_hb","Direct","HB-BK Cluster","S28: HB Beta Backward — Cluster Aux")
S28: HB Beta Backward Cluster-Aux

S28: HB Beta Backward Cluster-Aux

plot_hb_posterior(df_s28, "S28: HB Beta Backward — Cluster Aux",
                  group_col = "cluster_bk_logit")
S28: HB Beta Posterior — Cluster Aux (A1-Logit)

S28: HB Beta Posterior — Cluster Aux (A1-Logit)

S29 — Top-n/10 · All

cat("=== Skenario 29: HB TopN All ===\n")
## === Skenario 29: HB TopN All ===
df_s29 <- run_on_segments(
  seg_list = list("All" = df_base),
  aux_fn   = seleksi_topn, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)
##   [ All ] y_logit | n = 81 | Vars: X8, X45, X44, X39, X36_mean, X30_mean, X28, X1

##              Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept -2.3051  0.0157 -2.3378 -2.3154 -2.3036 -2.2944 -2.28
## X8         0.2939  0.0278  0.2439  0.2730  0.2927  0.3142  0.35
## X45       -0.2487  0.0227 -0.2920 -0.2640 -0.2490 -0.2312 -0.21
## X44        0.3194  0.0255  0.2718  0.3009  0.3213  0.3361  0.37
## X39       -0.0337  0.0209 -0.0732 -0.0475 -0.0339 -0.0195  0.01
## X36_mean  -0.1978  0.0288 -0.2518 -0.2167 -0.1961 -0.1771 -0.14
## X30_mean   0.0612  0.0239  0.0154  0.0458  0.0622  0.0773  0.10
## X28        0.1152  0.0194  0.0777  0.1026  0.1151  0.1280  0.15
## X1         0.0240  0.0186 -0.0111  0.0121  0.0233  0.0364  0.06
summary(df_s29[, c("Estimasi","y_hb_pct","RSE","RSE_hb")])
##     Estimasi        y_hb_pct          RSE            RSE_hb    
##  Min.   : 0.08   Min.   : 1.73   Min.   : 22.9   Min.   :18.1  
##  1st Qu.: 6.18   1st Qu.: 7.64   1st Qu.: 38.3   1st Qu.:21.7  
##  Median : 9.32   Median : 9.77   Median : 44.4   Median :23.6  
##  Mean   :10.08   Mean   :10.12   Mean   : 47.6   Mean   :24.2  
##  3rd Qu.:13.93   3rd Qu.:12.70   3rd Qu.: 53.5   3rd Qu.:26.1  
##  Max.   :24.05   Max.   :19.53   Max.   :101.7   Max.   :38.6
plot_rse(df_s29,"RSE","RSE_hb","Direct","HB-TN All","S29: HB Beta Top-n/10 — All Domain")
S29: HB Beta Top-n/10 All

S29: HB Beta Top-n/10 All

plot_hb_posterior(df_s29, "S29: HB Beta Top-n/10 — All Domain")
S29: HB Beta Posterior — All Domain

S29: HB Beta Posterior — All Domain

S30 — Top-n/10 · RSE-NB

cat("=== Skenario 30: HB TopN RSE-NB ===\n")
## === Skenario 30: HB TopN RSE-NB ===
df_s30 <- run_on_segments(
  seg_list = split(df_base, df_base$grup_jenks),
  aux_fn   = seleksi_topn, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)
##   [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X36_mean, X29_mean, X31_mean, X26

##               Mean       SD     2.5%      25%      50%      75% 97.5%
## intercept -2.10307  0.01413 -2.13161 -2.11221 -2.10234 -2.09364 -2.08
## X8         0.16708  0.01846  0.13005  0.15555  0.16729  0.17986  0.20
## X36_mean  -0.00662  0.01820 -0.04286 -0.02015 -0.00676  0.00590  0.03
## X29_mean  -0.11807  0.01812 -0.15289 -0.12884 -0.11754 -0.10645 -0.09
## X31_mean  -0.03707  0.01988 -0.07754 -0.04935 -0.03882 -0.02265  0.00
## X26        0.02059  0.01810 -0.01693  0.00942  0.02151  0.03125  0.06
##   [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8

##              Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept -3.2210  0.0480 -3.3032 -3.2565 -3.2222 -3.1878 -3.12
## X8         0.5560  0.0441  0.4721  0.5255  0.5571  0.5840  0.65
summary(df_s30[, c("Estimasi","y_hb_pct","RSE","RSE_hb")])
##     Estimasi        y_hb_pct          RSE            RSE_hb     
##  Min.   : 0.08   Min.   : 1.13   Min.   : 22.9   Min.   : 5.88  
##  1st Qu.: 6.18   1st Qu.: 6.13   1st Qu.: 38.3   1st Qu.: 7.60  
##  Median : 9.32   Median : 9.08   Median : 44.4   Median : 9.79  
##  Mean   :10.08   Mean   :10.09   Mean   : 47.6   Mean   :15.03  
##  3rd Qu.:13.93   3rd Qu.:13.74   3rd Qu.: 53.5   3rd Qu.:14.67  
##  Max.   :24.05   Max.   :23.51   Max.   :101.7   Max.   :45.98
plot_rse(df_s30,"RSE","RSE_hb","Direct","HB-TN RSE-NB","S30: HB Beta Top-n/10 — RSE Natural Break")
S30: HB Beta Top-n/10 RSE Natural Break

S30: HB Beta Top-n/10 RSE Natural Break

plot_hb_posterior(df_s30, "S30: HB Beta Top-n/10 — RSE Natural Break", group_col = "grup_jenks")
S30: HB Beta Posterior — RSE Natural Break

S30: HB Beta Posterior — RSE Natural Break

S31 — Top-n/10 · RSE-ES

cat("=== Skenario 31: HB TopN RSE-ES ===\n")
## === Skenario 31: HB TopN RSE-ES ===
df_s31 <- run_on_segments(
  seg_list = split(df_base, df_base$grup_equal),
  aux_fn   = seleksi_topn, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)
##   [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah, X10_jumlah, X8

##               Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept  -1.9517  0.0170 -1.9838 -1.9630 -1.9516 -1.9408 -1.92
## X19        -0.1901  0.0163 -0.2205 -0.2024 -0.1890 -0.1782 -0.16
## X29_jumlah -0.1243  0.0172 -0.1543 -0.1349 -0.1245 -0.1130 -0.09
## X10_jumlah -0.0765  0.0152 -0.1046 -0.0864 -0.0766 -0.0661 -0.05
## X8          0.0939  0.0157  0.0632  0.0837  0.0943  0.1033  0.13
##   [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45, X8, X1, X40_jumlah

##                Mean       SD     2.5%      25%      50%      75% 97.5%
## intercept  -2.71930  0.01925 -2.75942 -2.73069 -2.71907 -2.70639 -2.68
## X45        -0.42047  0.02988 -0.47661 -0.44089 -0.41907 -0.40245 -0.36
## X8         -0.08794  0.02821 -0.14524 -0.10612 -0.08710 -0.06989 -0.03
## X1         -0.13468  0.02245 -0.17781 -0.15069 -0.13461 -0.11993 -0.09
## X40_jumlah  0.03389  0.02008 -0.00288  0.02102  0.03419  0.04592  0.07
summary(df_s31[, c("Estimasi","y_hb_pct","RSE","RSE_hb")])
##     Estimasi        y_hb_pct          RSE            RSE_hb     
##  Min.   : 0.08   Min.   : 2.06   Min.   : 22.9   Min.   : 6.84  
##  1st Qu.: 6.18   1st Qu.: 6.43   1st Qu.: 38.3   1st Qu.: 9.47  
##  Median : 9.32   Median : 8.39   Median : 44.4   Median :17.74  
##  Mean   :10.08   Mean   :10.14   Mean   : 47.6   Mean   :16.48  
##  3rd Qu.:13.93   3rd Qu.:13.51   3rd Qu.: 53.5   3rd Qu.:22.48  
##  Max.   :24.05   Max.   :23.65   Max.   :101.7   Max.   :29.64
plot_rse(df_s31,"RSE","RSE_hb","Direct","HB-TN RSE-ES","S31: HB Beta Top-n/10 — RSE Equal Size")
S31: HB Beta Top-n/10 RSE Equal Size

S31: HB Beta Top-n/10 RSE Equal Size

plot_hb_posterior(df_s31, "S31: HB Beta Top-n/10 — RSE Equal Size", group_col = "grup_equal")
S31: HB Beta Posterior — RSE Equal Size

S31: HB Beta Posterior — RSE Equal Size

S32 — Top-n/10 · Cluster

cat("=== Skenario 32: HB TopN Cluster ===\n")
## === Skenario 32: HB TopN Cluster ===
df_s32 <- run_on_segments(
  seg_list = split(df_base, df_base$cluster_tn_logit),
  aux_fn   = seleksi_topn, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)
##   [ Cluster_1 ] y_logit | n = 67 | Vars: X45, X25_jumlah, X11_mean, X26_mean, X43_mean, X10_jumlah

##                 Mean        SD      2.5%       25%       50%       75% 97.5%
## intercept  -2.158891  0.016203 -2.189879 -2.169829 -2.158367 -2.149129 -2.12
## X45        -0.099301  0.017205 -0.131962 -0.110902 -0.099404 -0.088501 -0.07
## X25_jumlah -0.101848  0.020448 -0.139426 -0.116646 -0.101861 -0.088094 -0.06
## X11_mean    0.030707  0.014922  0.000532  0.021010  0.030359  0.038723  0.06
## X26_mean   -0.087337  0.016424 -0.123431 -0.098104 -0.086175 -0.075856 -0.06
## X43_mean   -0.061523  0.016941 -0.094442 -0.072310 -0.062899 -0.050619 -0.03
## X10_jumlah -0.100763  0.020370 -0.140027 -0.115026 -0.100517 -0.087260 -0.06
##   [ Cluster_2 ] y_logit | n = 14 | Vars: X30

##              Mean      SD    2.5%     25%     50%     75% 97.5%
## intercept -3.1841  0.0697 -3.3371 -3.2301 -3.1788 -3.1343 -3.07
## X30       -0.4277  0.0703 -0.5566 -0.4743 -0.4268 -0.3808 -0.29
summary(df_s32[, c("Estimasi","y_hb_pct","RSE","RSE_hb")])
##     Estimasi        y_hb_pct          RSE            RSE_hb    
##  Min.   : 0.08   Min.   : 1.03   Min.   : 22.9   Min.   :15.3  
##  1st Qu.: 6.18   1st Qu.: 6.51   1st Qu.: 38.3   1st Qu.:19.2  
##  Median : 9.32   Median : 9.54   Median : 44.4   Median :21.3  
##  Mean   :10.08   Mean   :10.10   Mean   : 47.6   Mean   :24.3  
##  3rd Qu.:13.93   3rd Qu.:13.24   3rd Qu.: 53.5   3rd Qu.:25.2  
##  Max.   :24.05   Max.   :20.74   Max.   :101.7   Max.   :53.1
plot_rse(df_s32,"RSE","RSE_hb","Direct","HB-TN Cluster","S32: HB Beta Top-n/10 — Cluster Aux")
S32: HB Beta Top-n/10 Cluster-Aux

S32: HB Beta Top-n/10 Cluster-Aux

plot_hb_posterior(df_s32, "S32: HB Beta Top-n/10 — Cluster Aux",
                  group_col = "cluster_tn_logit")
S32: HB Beta Posterior — Cluster Aux (A2-Logit)

S32: HB Beta Posterior — Cluster Aux (A2-Logit)


9. Evaluasi Komparatif — 32 Skenario

Kumpulkan Hasil

# Standarisasi: tiap df hasil ditambah y_model & RSE_model
standardize_result <- function(df, y_col, rse_col, skenario_id) {
  if (is.null(df)) return(NULL)
  df %>%
    mutate(
      y_model   = .data[[y_col]],
      RSE_model = .data[[rse_col]],
      Skenario  = skenario_id
    ) %>%
    dplyr::select(Kako, Estimasi, RSE, y_model, RSE_model, Skenario)
}

# Baseline direct
df_direct_eval <- df_base %>%
  mutate(RSE_direct_eval = RSE_direct_col) %>%
  dplyr::select(Kako, Estimasi, RSE, RSE_direct_eval)

# ── Tabel distribusi RSE ─────────────────────────────────────────────────
make_tabel_row <- function(df, rse_col, label) {
  if (is.null(df) || !rse_col %in% names(df)) {
    return(data.frame(Skenario = label, `RSE>=25` = NA, `RSE<25` = NA,
                      `%<25` = NA, `%<15` = NA, `CV Mean` = NA,
                      check.names = FALSE))
  }
  tabel_rse(df, rse_col, label)
}

eval_rows <- list(
  make_tabel_row(df_base,  "RSE_direct_col",  "0. Direct"),
  # EBLUP
  make_tabel_row(df_s1,  "RSE_eblup", "S01 EBLUP-BK All"),
  make_tabel_row(df_s2,  "RSE_eblup", "S02 EBLUP-BK RSE-NB"),
  make_tabel_row(df_s3,  "RSE_eblup", "S03 EBLUP-BK RSE-ES"),
  make_tabel_row(df_s4,  "RSE_eblup", "S04 EBLUP-BK Cluster"),
  make_tabel_row(df_s5,  "RSE_eblup", "S05 EBLUP-TN All"),
  make_tabel_row(df_s6,  "RSE_eblup", "S06 EBLUP-TN RSE-NB"),
  make_tabel_row(df_s7,  "RSE_eblup", "S07 EBLUP-TN RSE-ES"),
  make_tabel_row(df_s8,  "RSE_eblup", "S08 EBLUP-TN Cluster"),
  # GLMM
  make_tabel_row(df_s9,  "RSE_glmm",  "S09 GLMM-BK All"),
  make_tabel_row(df_s10, "RSE_glmm",  "S10 GLMM-BK RSE-NB"),
  make_tabel_row(df_s11, "RSE_glmm",  "S11 GLMM-BK RSE-ES"),
  make_tabel_row(df_s12, "RSE_glmm",  "S12 GLMM-BK Cluster"),
  make_tabel_row(df_s13, "RSE_glmm",  "S13 GLMM-TN All"),
  make_tabel_row(df_s14, "RSE_glmm",  "S14 GLMM-TN RSE-NB"),
  make_tabel_row(df_s15, "RSE_glmm",  "S15 GLMM-TN RSE-ES"),
  make_tabel_row(df_s16, "RSE_glmm",  "S16 GLMM-TN Cluster"),
  # EB Beta
  make_tabel_row(df_s17, "RSE_eb",    "S17 EB-BK All"),
  make_tabel_row(df_s18, "RSE_eb",    "S18 EB-BK RSE-NB"),
  make_tabel_row(df_s19, "RSE_eb",    "S19 EB-BK RSE-ES"),
  make_tabel_row(df_s20, "RSE_eb",    "S20 EB-BK Cluster"),
  make_tabel_row(df_s21, "RSE_eb",    "S21 EB-TN All"),
  make_tabel_row(df_s22, "RSE_eb",    "S22 EB-TN RSE-NB"),
  make_tabel_row(df_s23, "RSE_eb",    "S23 EB-TN RSE-ES"),
  make_tabel_row(df_s24, "RSE_eb",    "S24 EB-TN Cluster"),
  # HB Beta
  make_tabel_row(df_s25, "RSE_hb",    "S25 HB-BK All"),
  make_tabel_row(df_s26, "RSE_hb",    "S26 HB-BK RSE-NB"),
  make_tabel_row(df_s27, "RSE_hb",    "S27 HB-BK RSE-ES"),
  make_tabel_row(df_s28, "RSE_hb",    "S28 HB-BK Cluster"),
  make_tabel_row(df_s29, "RSE_hb",    "S29 HB-TN All"),
  make_tabel_row(df_s30, "RSE_hb",    "S30 HB-TN RSE-NB"),
  make_tabel_row(df_s31, "RSE_hb",    "S31 HB-TN RSE-ES"),
  make_tabel_row(df_s32, "RSE_hb",    "S32 HB-TN Cluster")
)

tabel_eval <- bind_rows(eval_rows)

Tabel Distribusi RSE

kable(tabel_eval,
      caption = "Distribusi RSE — 32 Skenario + Direct",
      col.names = c("Skenario", "RSE≥25", "RSE<25", "% <25%", "% <15%", "CV Mean")) %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, font_size = 12) %>%
  row_spec(1, background = "#fff3cd") %>%
  row_spec(
    which(tabel_eval$`%<25` == max(tabel_eval$`%<25`, na.rm = TRUE))[-1],
    bold = TRUE, background = "#d4edda"
  ) %>%
  pack_rows("EBLUP", 2, 9) %>%
  pack_rows("GLMM", 10, 17) %>%
  pack_rows("EB Beta-Binomial", 18, 25) %>%
  pack_rows("HB Beta-Binomial", 26, 33)
Distribusi RSE — 32 Skenario + Direct
Skenario RSE≥25 RSE<25 % <25% % <15% CV Mean
  1. Direct
80 1 1.2 0.0 47.56
EBLUP
S01 EBLUP-BK All 66 15 18.5 0.0 32.57
S02 EBLUP-BK RSE-NB 26 55 67.9 35.8 27.37
S03 EBLUP-BK RSE-ES 48 33 40.7 0.0 31.20
S04 EBLUP-BK Cluster 66 15 18.5 0.0 33.65
S05 EBLUP-TN All 72 9 11.1 0.0 33.79
S06 EBLUP-TN RSE-NB 27 54 66.7 27.2 28.54
S07 EBLUP-TN RSE-ES 53 28 34.6 0.0 33.04
S08 EBLUP-TN Cluster 75 6 7.4 0.0 35.84
GLMM
S09 GLMM-BK All 1 80 98.8 98.8 2.23
S10 GLMM-BK RSE-NB 2 79 97.5 93.8 3.49
S11 GLMM-BK RSE-ES 1 80 98.8 97.5 2.93
S12 GLMM-BK Cluster 3 78 96.3 93.8 5.83
S13 GLMM-TN All 1 80 98.8 97.5 5.50
S14 GLMM-TN RSE-NB 2 79 97.5 92.6 4.08
S15 GLMM-TN RSE-ES 1 80 98.8 98.8 4.21
S16 GLMM-TN Cluster 4 77 95.1 90.1 7.99
EB Beta-Binomial
S17 EB-BK All 79 2 2.5 0.0 37.68
S18 EB-BK RSE-NB 75 6 7.4 0.0 36.29
S19 EB-BK RSE-ES 71 10 12.3 0.0 35.39
S20 EB-BK Cluster 79 2 2.5 0.0 36.36
S21 EB-TN All 79 2 2.5 0.0 37.66
S22 EB-TN RSE-NB 75 6 7.4 0.0 36.29
S23 EB-TN RSE-ES 71 10 12.3 0.0 35.33
S24 EB-TN Cluster 79 2 2.5 0.0 36.38
HB Beta-Binomial
S25 HB-BK All 32 49 60.5 0.0 25.06
S26 HB-BK RSE-NB 19 62 76.5 76.5 14.24
S27 HB-BK RSE-ES 9 72 88.9 38.3 17.74
S28 HB-BK Cluster 23 58 71.6 0.0 24.45
S29 HB-TN All 28 53 65.4 0.0 24.21
S30 HB-TN RSE-NB 19 62 76.5 75.3 15.03
S31 HB-TN RSE-ES 9 72 88.9 46.9 16.48
S32 HB-TN Cluster 22 59 72.8 0.0 24.28

Metrik Akurasi (RB & RMSE)

get_est <- function(df, col) if (!is.null(df) && col %in% names(df)) df[[col]] else NULL

metrik_rows <- list(
  metrik(get_est(df_s1,  "y_eblup"),   df_s1$Estimasi,   "S01 EBLUP-BK All"),
  metrik(get_est(df_s2,  "y_eblup"),   df_s2$Estimasi,   "S02 EBLUP-BK RSE-NB"),
  metrik(get_est(df_s3,  "y_eblup"),   df_s3$Estimasi,   "S03 EBLUP-BK RSE-ES"),
  metrik(get_est(df_s4,  "y_eblup"),   df_s4$Estimasi,   "S04 EBLUP-BK Cluster"),
  metrik(get_est(df_s5,  "y_eblup"),   df_s5$Estimasi,   "S05 EBLUP-TN All"),
  metrik(get_est(df_s6,  "y_eblup"),   df_s6$Estimasi,   "S06 EBLUP-TN RSE-NB"),
  metrik(get_est(df_s7,  "y_eblup"),   df_s7$Estimasi,   "S07 EBLUP-TN RSE-ES"),
  metrik(get_est(df_s8,  "y_eblup"),   df_s8$Estimasi,   "S08 EBLUP-TN Cluster"),
  metrik(get_est(df_s9,  "y_glmm"),    df_s9$Estimasi,   "S09 GLMM-BK All"),
  metrik(get_est(df_s10, "y_glmm"),    df_s10$Estimasi,  "S10 GLMM-BK RSE-NB"),
  metrik(get_est(df_s11, "y_glmm"),    df_s11$Estimasi,  "S11 GLMM-BK RSE-ES"),
  metrik(get_est(df_s12, "y_glmm"),    df_s12$Estimasi,  "S12 GLMM-BK Cluster"),
  metrik(get_est(df_s13, "y_glmm"),    df_s13$Estimasi,  "S13 GLMM-TN All"),
  metrik(get_est(df_s14, "y_glmm"),    df_s14$Estimasi,  "S14 GLMM-TN RSE-NB"),
  metrik(get_est(df_s15, "y_glmm"),    df_s15$Estimasi,  "S15 GLMM-TN RSE-ES"),
  metrik(get_est(df_s16, "y_glmm"),    df_s16$Estimasi,  "S16 GLMM-TN Cluster"),
  metrik(get_est(df_s17, "y_eb_pct"),  df_s17$Estimasi,  "S17 EB-BK All"),
  metrik(get_est(df_s18, "y_eb_pct"),  df_s18$Estimasi,  "S18 EB-BK RSE-NB"),
  metrik(get_est(df_s19, "y_eb_pct"),  df_s19$Estimasi,  "S19 EB-BK RSE-ES"),
  metrik(get_est(df_s20, "y_eb_pct"),  df_s20$Estimasi,  "S20 EB-BK Cluster"),
  metrik(get_est(df_s21, "y_eb_pct"),  df_s21$Estimasi,  "S21 EB-TN All"),
  metrik(get_est(df_s22, "y_eb_pct"),  df_s22$Estimasi,  "S22 EB-TN RSE-NB"),
  metrik(get_est(df_s23, "y_eb_pct"),  df_s23$Estimasi,  "S23 EB-TN RSE-ES"),
  metrik(get_est(df_s24, "y_eb_pct"),  df_s24$Estimasi,  "S24 EB-TN Cluster"),
  metrik(get_est(df_s25, "y_hb_pct"),  df_s25$Estimasi,  "S25 HB-BK All"),
  metrik(get_est(df_s26, "y_hb_pct"),  df_s26$Estimasi,  "S26 HB-BK RSE-NB"),
  metrik(get_est(df_s27, "y_hb_pct"),  df_s27$Estimasi,  "S27 HB-BK RSE-ES"),
  metrik(get_est(df_s28, "y_hb_pct"),  df_s28$Estimasi,  "S28 HB-BK Cluster"),
  metrik(get_est(df_s29, "y_hb_pct"),  df_s29$Estimasi,  "S29 HB-TN All"),
  metrik(get_est(df_s30, "y_hb_pct"),  df_s30$Estimasi,  "S30 HB-TN RSE-NB"),
  metrik(get_est(df_s31, "y_hb_pct"),  df_s31$Estimasi,  "S31 HB-TN RSE-ES"),
  metrik(get_est(df_s32, "y_hb_pct"),  df_s32$Estimasi,  "S32 HB-TN Cluster")
)
tabel_metrik <- bind_rows(Filter(Negate(is.null), metrik_rows))

kable(tabel_metrik,
      caption = "Relative Bias (%) & RMSE terhadap Direct Estimator") %>%
  kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
                full_width = FALSE, font_size = 12) %>%
  row_spec(which(abs(tabel_metrik$`RB (%)`) == min(abs(tabel_metrik$`RB (%)`), na.rm=TRUE)),
           bold = TRUE, background = "#d4edda")
Relative Bias (%) & RMSE terhadap Direct Estimator
Skenario RB (%) RMSE
S01 EBLUP-BK All -6.336 3.8559
S02 EBLUP-BK RSE-NB -11.100 4.1048
S03 EBLUP-BK RSE-ES -9.116 3.2683
S04 EBLUP-BK Cluster -6.150 3.6493
S05 EBLUP-TN All -5.870 3.7863
S06 EBLUP-TN RSE-NB -10.974 4.0329
S07 EBLUP-TN RSE-ES -8.461 3.1051
S08 EBLUP-TN Cluster -6.284 3.4471
S09 GLMM-BK All 11.118 2.4846
S10 GLMM-BK RSE-NB 4.191 3.6762
S11 GLMM-BK RSE-ES 6.220 2.8721
S12 GLMM-BK Cluster 9.401 2.9225
S13 GLMM-TN All 11.320 2.5729
S14 GLMM-TN RSE-NB 4.400 3.7664
S15 GLMM-TN RSE-ES 7.038 3.0583
S16 GLMM-TN Cluster 10.842 3.2166
S17 EB-BK All 2.070 1.9740
S18 EB-BK RSE-NB 0.581 2.0928
S19 EB-BK RSE-ES 0.754 1.7348
S20 EB-BK Cluster 2.910 2.0358
S21 EB-TN All 2.162 1.9689
S22 EB-TN RSE-NB 0.586 2.0865
S23 EB-TN RSE-ES 0.903 1.7279
S24 EB-TN Cluster 3.135 2.1364
S25 HB-BK All 64.926 2.3195
S26 HB-BK RSE-NB 23.497 0.6285
S27 HB-BK RSE-ES 71.816 1.4940
S28 HB-BK Cluster 24.821 1.7359
S29 HB-TN All 81.880 2.3613
S30 HB-TN RSE-NB 23.608 0.6426
S31 HB-TN RSE-ES 72.073 1.3614
S32 HB-TN Cluster 32.512 1.5165

Barplot % Domain RSE < 25%

direct_pct <- pct_ok(df_base$RSE_direct_col)

plot_df <- tabel_eval %>%
  filter(Skenario != "0. Direct") %>%
  mutate(
    pct_25    = `% < 25%`,                        # << nama persis dari debug
    Model     = case_when(
      grepl("EBLUP", Skenario)        ~ "EBLUP",
      grepl("GLMM",  Skenario)        ~ "GLMM",
      grepl("EB-BK|EB-TN", Skenario)  ~ "EB Beta",
      grepl("HB-BK|HB-TN", Skenario)  ~ "HB Beta",
      TRUE                             ~ "Lain"
    ),
    AuxMetode = ifelse(grepl("-BK", Skenario), "Backward", "Top-n/10"),
    Skenario  = factor(Skenario, levels = rev(unique(Skenario)))
  )

ggplot(plot_df, aes(x = Skenario, y = pct_25, fill = Model, alpha = AuxMetode)) +
  geom_col(width = 0.72) +
  geom_text(aes(label = paste0(pct_25, "%")),
            hjust = -0.12, size = 3, fontface = "bold") +
  geom_vline(xintercept = direct_pct,
             linetype = "dashed", color = "#c0392b", linewidth = 0.9) +
  scale_fill_manual(values = c(
    "EBLUP"   = "#2980b9", "GLMM"    = "#8e44ad",
    "EB Beta" = "#e67e22", "HB Beta" = "#27ae60"
  )) +
  scale_alpha_manual(values = c("Backward" = 1, "Top-n/10" = 0.65)) +
  scale_y_continuous(limits = c(0, 115), expand = c(0, 0)) +
  coord_flip() +
  labs(title    = "% Domain RSE < 25% — 32 Skenario SAE",
       subtitle = "Garis merah = baseline Direct | Gelap = Backward, Terang = Top-n/10",
       x = NULL, y = "% Domain RSE < 25%", fill = "Model", alpha = "Seleksi Aux") +
  theme_minimal(base_size = 11) +
  theme(legend.position = "top", plot.title = element_text(face = "bold", size = 13),
        panel.grid.major.y = element_blank())

Scatter RSE Direct vs Model (Facet 32)

make_scatter_df <- function(df, y_col, rse_col, label) {
  if (is.null(df) || !all(c(y_col, rse_col, "RSE_direct") %in% names(df))) return(NULL)
  df %>%
    mutate(RSE_model = .data[[rse_col]], Skenario = label) %>%
    dplyr::select(Kako, RSE_direct, RSE_model, Skenario)   # << RSE_direct bukan RSE_direct_col
}

scatter_list <- list(
  make_scatter_df(df_s1,  "y_eblup",  "RSE_eblup", "S01 EBLUP-BK All"),
  make_scatter_df(df_s2,  "y_eblup",  "RSE_eblup", "S02 EBLUP-BK RSE-NB"),
  make_scatter_df(df_s3,  "y_eblup",  "RSE_eblup", "S03 EBLUP-BK RSE-ES"),
  make_scatter_df(df_s4,  "y_eblup",  "RSE_eblup", "S04 EBLUP-BK Clust"),
  make_scatter_df(df_s5,  "y_eblup",  "RSE_eblup", "S05 EBLUP-TN All"),
  make_scatter_df(df_s6,  "y_eblup",  "RSE_eblup", "S06 EBLUP-TN RSE-NB"),
  make_scatter_df(df_s7,  "y_eblup",  "RSE_eblup", "S07 EBLUP-TN RSE-ES"),
  make_scatter_df(df_s8,  "y_eblup",  "RSE_eblup", "S08 EBLUP-TN Clust"),
  make_scatter_df(df_s9,  "y_glmm",   "RSE_glmm",  "S09 GLMM-BK All"),
  make_scatter_df(df_s10, "y_glmm",   "RSE_glmm",  "S10 GLMM-BK RSE-NB"),
  make_scatter_df(df_s11, "y_glmm",   "RSE_glmm",  "S11 GLMM-BK RSE-ES"),
  make_scatter_df(df_s12, "y_glmm",   "RSE_glmm",  "S12 GLMM-BK Clust"),
  make_scatter_df(df_s13, "y_glmm",   "RSE_glmm",  "S13 GLMM-TN All"),
  make_scatter_df(df_s14, "y_glmm",   "RSE_glmm",  "S14 GLMM-TN RSE-NB"),
  make_scatter_df(df_s15, "y_glmm",   "RSE_glmm",  "S15 GLMM-TN RSE-ES"),
  make_scatter_df(df_s16, "y_glmm",   "RSE_glmm",  "S16 GLMM-TN Clust"),
  make_scatter_df(df_s17, "y_eb_pct", "RSE_eb",    "S17 EB-BK All"),
  make_scatter_df(df_s18, "y_eb_pct", "RSE_eb",    "S18 EB-BK RSE-NB"),
  make_scatter_df(df_s19, "y_eb_pct", "RSE_eb",    "S19 EB-BK RSE-ES"),
  make_scatter_df(df_s20, "y_eb_pct", "RSE_eb",    "S20 EB-BK Clust"),
  make_scatter_df(df_s21, "y_eb_pct", "RSE_eb",    "S21 EB-TN All"),
  make_scatter_df(df_s22, "y_eb_pct", "RSE_eb",    "S22 EB-TN RSE-NB"),
  make_scatter_df(df_s23, "y_eb_pct", "RSE_eb",    "S23 EB-TN RSE-ES"),
  make_scatter_df(df_s24, "y_eb_pct", "RSE_eb",    "S24 EB-TN Clust"),
  make_scatter_df(df_s25, "y_hb_pct", "RSE_hb",    "S25 HB-BK All"),
  make_scatter_df(df_s26, "y_hb_pct", "RSE_hb",    "S26 HB-BK RSE-NB"),
  make_scatter_df(df_s27, "y_hb_pct", "RSE_hb",    "S27 HB-BK RSE-ES"),
  make_scatter_df(df_s28, "y_hb_pct", "RSE_hb",    "S28 HB-BK Clust"),
  make_scatter_df(df_s29, "y_hb_pct", "RSE_hb",    "S29 HB-TN All"),
  make_scatter_df(df_s30, "y_hb_pct", "RSE_hb",    "S30 HB-TN RSE-NB"),
  make_scatter_df(df_s31, "y_hb_pct", "RSE_hb",    "S31 HB-TN RSE-ES"),
  make_scatter_df(df_s32, "y_hb_pct", "RSE_hb",    "S32 HB-TN Clust")
)

df_scatter <- bind_rows(Filter(Negate(is.null), scatter_list)) %>%
  filter(!is.na(RSE_direct) & !is.na(RSE_model))

lim_max <- quantile(c(df_scatter$RSE_direct, df_scatter$RSE_model), 0.99, na.rm = TRUE)

ggplot(df_scatter, aes(x = RSE_direct, y = RSE_model)) +
  geom_point(alpha = 0.5, size = 1.3, color = "#2980b9") +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "#c0392b") +
  geom_hline(yintercept = 25, linetype = "dotted", color = "#e67e22") +
  geom_vline(xintercept = 25, linetype = "dotted", color = "#e67e22") +
  facet_wrap(~ Skenario, ncol = 4, scales = "free") +
  coord_cartesian(xlim = c(0, lim_max), ylim = c(0, lim_max)) +
  labs(title    = "RSE Direct vs RSE Model — 32 Skenario SAE",
       subtitle = "Di bawah diagonal merah = lebih presisi dari direct",
       x = "RSE Direct (%)", y = "RSE Model (%)") +
  theme_minimal(base_size = 9) +
  theme(strip.text = element_text(face = "bold", size = 7),
        plot.title = element_text(face = "bold"))

Heatmap Ringkasan

heat_df <- tabel_eval %>%
  filter(Skenario != "0. Direct") %>%
  mutate(
    pct_25   = `% < 25%`,                        # << nama persis
    Model    = case_when(
      grepl("EBLUP", Skenario)        ~ "EBLUP",
      grepl("GLMM",  Skenario)        ~ "GLMM",
      grepl("EB-BK|EB-TN", Skenario)  ~ "EB-Beta",
      TRUE                             ~ "HB-Beta"
    ),
    AuxMetode = ifelse(grepl("-BK", Skenario), "Backward", "Top-n/10"),
    Partisi   = case_when(
      grepl("All",    Skenario) ~ "All",
      grepl("RSE-NB", Skenario) ~ "RSE-NB",
      grepl("RSE-ES", Skenario) ~ "RSE-ES",
      TRUE                      ~ "Cluster"
    ),
    Partisi   = factor(Partisi,   levels = c("All","RSE-NB","RSE-ES","Cluster")),
    Model     = factor(Model,     levels = c("EBLUP","GLMM","EB-Beta","HB-Beta")),
    AuxMetode = factor(AuxMetode, levels = c("Backward","Top-n/10"))
  )

ggplot(heat_df, aes(x = Partisi, y = Model, fill = pct_25)) +
  geom_tile(color = "white", linewidth = 0.8) +
  geom_text(aes(label = paste0(pct_25, "%")), size = 4, fontface = "bold") +
  facet_wrap(~ AuxMetode, ncol = 2) +
  scale_fill_gradient2(low = "#e74c3c", mid = "#f39c12", high = "#27ae60",
                       midpoint = 60, name = "% Domain\nRSE < 25%") +
  labs(title = "Heatmap Performa — % Domain RSE < 25%",
       subtitle = "Kiri = Backward | Kanan = Top-n/10",
       x = "Partisi", y = "Model") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold"),
        strip.text = element_text(face = "bold", size = 12),
        axis.text  = element_text(size = 11))


10. Ringkasan & Kesimpulan

best_rse <- tabel_eval %>%
  filter(Skenario != "0. Direct") %>%
  mutate(pct_25 = `% < 25%`) %>%            # << tarik dulu
  slice_max(pct_25, n = 3, with_ties = FALSE)

best_rb <- tabel_metrik %>%
  slice_min(abs(`RB (%)`), n = 3, with_ties = FALSE)

cat("=== Top 3 Model (% domain RSE < 25%) ===\n")
## === Top 3 Model (% domain RSE < 25%) ===
print(best_rse[, c("Skenario", "pct_25", "% < 15%", "CV Mean")])
##             Skenario pct_25 % < 15% CV Mean
## 1    S09 GLMM-BK All   98.8    98.8    2.23
## 2 S11 GLMM-BK RSE-ES   98.8    97.5    2.93
## 3    S13 GLMM-TN All   98.8    97.5    5.50
cat("\n=== Top 3 Model (Relative Bias terkecil) ===\n")
## 
## === Top 3 Model (Relative Bias terkecil) ===
print(best_rb[, c("Skenario", "RB (%)", "RMSE")])
##           Skenario RB (%)  RMSE
## 1 S18 EB-BK RSE-NB  0.581 2.093
## 2 S22 EB-TN RSE-NB  0.586 2.087
## 3 S19 EB-BK RSE-ES  0.754 1.735

Panduan interpretasi 32 skenario:

  1. Seleksi Aux (Backward vs Top-n/10): Backward lebih selektif (stepAIC membuang variabel non-signifikan); Top-n/10 mempertahankan variabel dengan korelasi tertinggi. Keduanya menggunakan floor(n_partisi/10) variabel awal dari pre-filter korelasi.

  2. EBLUP (Normal): Baseline kuat untuk domain tidak terlalu ekstrim proporsinya. Rentan estimasi negatif jika proporsi mendekati 0.

  3. GLMM (Logit, glmmTMB): Menangani bounded response [0,1] lebih proper. Random intercept per domain memberi shrinkage empiris. MSE dari Jackknife leave-one-domain-out.

  4. EB Beta-Binomial: Analytical — cepat dan tidak perlu MCMC. Prior mean domain-spesifik dari regresi logit. Posterior variance analytik dari distribusi Beta konjugat.

  5. HB Beta (saeHB/JAGS): Paling proper secara Bayesian. Memanfaatkan full posterior distribution. Plot posterior dengan 95% CI memperlihatkan ketidakpastian per domain.

  6. Partisi RSE-NB vs RSE-ES: Natural Break (Jenks) memisahkan berdasarkan gap alami distribusi RSE; Equal Size memastikan kedua grup seimbang. RSE-NB lebih heterogen antar grup, RSE-ES lebih stabil dalam estimasi.

  7. Cluster-Aux: Variabel clustering berbeda per kombinasi (Aux method, Link function), membuat cluster lebih relevan secara statistik untuk masing-masing model.


Dianalisis dengan R. Seluruh 32 skenario merupakan kombinasi dari 2 metode seleksi aux × 4 model × 4 partisi.