Ringkasan analisis: 32 skenario SAE — 2 metode seleksi aux (Backward vs Top-n/10) × 4 model (EBLUP, GLMM-Logit, EB Beta-Binomial, HB Beta) × 4 partisi (All, RSE Natural Break, RSE Equal Size, Cluster-Aux). Baseline: Direct Estimator.
# Load urutan: MASS & car DULU, baru dplyr (cegah masking select/recode)
library(MASS)
library(car)
library(readxl)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(patchwork)
library(sae) # mseFH
library(glmmTMB) # GLMM binomial
library(classInt) # Jenks Natural Breaks
library(factoextra)
library(knitr)
library(kableExtra)
# Ikat namespace kritis
select <- dplyr::select
recode <- dplyr::recodehas_jags <- tryCatch({
library(rjags); library(saeHB); TRUE
}, error = function(e) {
message("saeHB/JAGS tidak tersedia — skenario HB Beta akan fallback ke direct.")
FALSE
})
cat("Status JAGS/saeHB:", ifelse(has_jags, "Tersedia", "Tidak tersedia"), "\n")## Status JAGS/saeHB: Tersedia
Estimasi_531 <- read_excel("C:/Users/User/Downloads/Hasil_sulawesi_KabKota.xlsx")
Podes_kab2024 <- read_excel("C:/Users/User/Downloads/Podes_kab_lengkap.xlsx")
Data_Penduduk <- read_excel("C:/Users/User/Downloads/Data_Penduduk.xlsx")
Estimasi_531 <- Estimasi_531 %>% mutate(Kako = sprintf("%04d", Kako))
Podes_kab2024 <- Podes_kab2024 %>% mutate(kode_kab = sprintf("%04s", kode_kab))
df_gabungan <- Estimasi_531 %>%
left_join(Podes_kab2024, by = c("Kako" = "kode_kab")) %>%
dplyr::select(Kako, Estimasi, RSE, starts_with("X"))
df_gabungan$Kako <- as.numeric(df_gabungan$Kako)
df_final <- df_gabungan %>% left_join(Data_Penduduk, by = "Kako")
cat("Jumlah domain:", nrow(df_final), "\n")## Jumlah domain: 81
## Jumlah variabel: 112
df_final <- df_final %>%
mutate(
X6 = X6_jumlah / Jumlah_Penduduk * 1000,
X7 = X7_jumlah / Jumlah_Penduduk * 1000,
X10 = X10_jumlah / Jumlah_Penduduk * 1000,
X33 = X33_jumlah / Jumlah_Penduduk * 1000,
X11 = X11_jumlah / Jumlah_P * 1000,
X12 = X12_jumlah / Jumlah_P * 1000,
X27 = X27_jumlah / Jumlah_P * 1000,
X28 = X28_jumlah / Jumlah_P * 1000,
X29 = X29_jumlah / Jumlah_P * 1000,
X30 = X30_jumlah / Jumlah_P * 1000,
X31 = X31_jumlah / Jumlah_P * 1000,
X35 = X35_jumlah / Jumlah_P * 1000,
X36 = X36_jumlah / Jumlah_P * 1000
)eps_logit <- 1e-4
df_base <- df_final %>%
mutate(
vardir = (RSE / 100 * Estimasi)^2,
y_prop = pmax(pmin(Estimasi / 100, 1 - eps_logit), eps_logit),
y_logit = log(y_prop / (1 - y_prop))
) %>%
as.data.frame()
# ── Kandidat aux: HANYA kolom yang diawali "X" ───────────────────────────
# RSE_direct_col, grup_jenks, cluster_*, dll yang ditambahkan kemudian
# otomatis TIDAK masuk karena tidak berawalan X
kandidat_vars <- names(df_base)[startsWith(names(df_base), "X")]
# ── excl_always: otomatis semua kolom SELAIN X* ──────────────────────────
# Diupdate di setiap fungsi seleksi agar mencakup kolom baru yang
# ditambahkan setelah chunk ini (RSE_direct_col, grup_*, cluster_*)
excl_always <- setdiff(names(df_base), kandidat_vars)
cat("Variabel kandidat (", length(kandidat_vars), "):\n",
paste(kandidat_vars, collapse = ", "), "\n")## Variabel kandidat ( 119 ):
## X1, X2, X8, X9, X13, X14, X15, X16, X17, X18, X19, X21, X22, X23, X24, X25, X26, X32, X37, X38, X39, X40, X41, X43, X44, X45, X1_jumlah, X1_mean, X2_jumlah, X2_mean, X8_jumlah, X8_mean, X9_jumlah, X9_mean, X13_jumlah, X13_mean, X14_jumlah, X14_mean, X15_jumlah, X15_mean, X16_jumlah, X16_mean, X17_jumlah, X17_mean, X18_jumlah, X18_mean, X19_jumlah, X19_mean, X21_jumlah, X21_mean, X22_jumlah, X22_mean, X23_jumlah, X23_mean, X24_jumlah, X24_mean, X25_jumlah, X25_mean, X26_jumlah, X26_mean, X32_jumlah, X32_mean, X34_jumlah, X34_mean, X37_jumlah, X37_mean, X38_jumlah, X38_mean, X39_jumlah, X39_mean, X40_jumlah, X40_mean, X41_jumlah, X41_mean, X43_jumlah, X43_mean, X44_jumlah, X44_mean, X45_jumlah, X45_mean, X6_jumlah, X6_mean, X7_jumlah, X7_mean, X10_jumlah, X10_mean, X11_jumlah, X11_mean, X12_jumlah, X12_mean, X27_jumlah, X27_mean, X28_jumlah, X28_mean, X29_jumlah, X29_mean, X30_jumlah, X30_mean, X31_jumlah, X31_mean, X33_jumlah, X33_mean, X35_jumlah, X35_mean, X36_jumlah, X36_mean, X6, X7, X10, X33, X11, X12, X27, X28, X29, X30, X31, X35, X36
##
## Total dikecualikan: 9 kolom
# Helper: pilih 1 representatif terbaik per grup X (X19, X19_mean, X19_jumlah → 1)
# Pemilihan berdasarkan |cor| tertinggi ke y_col pada data segmen berjalan
pilih_per_grup <- function(df, kandidat, y_col) {
get_base <- function(x) sub("(_jumlah|_mean)$", "", x)
bases <- unique(sapply(kandidat, get_base))
terpilih <- c()
for (b in bases) {
anggota <- kandidat[get_base(kandidat) == b]
if (length(anggota) == 1) { terpilih <- c(terpilih, anggota); next }
cor_v <- sapply(anggota, function(v)
abs(cor(df[[v]], df[[y_col]], use = "complete.obs")))
cor_v <- cor_v[!is.na(cor_v)]
if (length(cor_v) == 0) { terpilih <- c(terpilih, anggota[1]); next }
terpilih <- c(terpilih, names(which.max(cor_v)))
}
unique(terpilih)
}
seleksi_backward <- function(df, y_col, excl_cols = NULL, r2_cap = 0.8) {
if (is.null(excl_cols)) excl_cols <- names(df)[!startsWith(names(df), "X")]
n_domain <- nrow(df)
max_top <- max(1L, floor(n_domain / 10L))
keep <- sapply(df, function(x) length(unique(na.omit(x))) > 1)
df <- df[, keep]
kandidat <- setdiff(names(df), excl_cols)
kandidat <- kandidat[startsWith(kandidat, "X")]
if (length(kandidat) == 0) return(character(0))
# Pilih 1 representatif per grup X (sesuai y_col & data segmen ini)
kandidat <- pilih_per_grup(df, kandidat, y_col)
if (length(kandidat) == 0) return(character(0))
cor_v <- sapply(df[kandidat], function(x)
abs(cor(x, df[[y_col]], use = "complete.obs")))
cor_v <- cor_v[!is.na(cor_v)]
if (length(cor_v) == 0) return(character(0))
vars <- names(sort(cor_v, decreasing = TRUE))[1:min(max_top, length(cor_v))]
vars <- vars[!is.na(vars)]
step_m <- try(stepAIC(
lm(as.formula(paste(y_col, "~", paste(vars, collapse = "+"))), data = df),
direction = "backward", trace = FALSE), silent = TRUE)
if (!inherits(step_m, "try-error")) {
sv <- names(coef(step_m))[-1]
if (length(sv) > 0) vars <- sv
}
repeat {
if (length(vars) <= 1) break
m <- try(lm(as.formula(paste(y_col,"~",paste(vars,collapse="+"))),data=df),silent=TRUE)
if (inherits(m,"try-error")) { vars <- vars[1]; break }
v <- try(vif(m), silent=TRUE)
if (inherits(v,"try-error")) break
v_ok <- v[!is.na(v)]
if (length(v_ok)==0 || all(v_ok<=10)) break
vars <- vars[vars != names(which.max(v_ok))]
}
if (length(vars) > 1) {
m <- lm(as.formula(paste(y_col,"~",paste(vars,collapse="+"))),data=df)
if (summary(m)$r.squared > r2_cap) vars <- vars[1]
}
return(vars)
}seleksi_topn <- function(df, y_col, excl_cols = NULL, r2_cap = 0.8) {
if (is.null(excl_cols)) excl_cols <- names(df)[!startsWith(names(df), "X")]
n_domain <- nrow(df)
max_top <- min(max(1L, floor(n_domain / 10L)), 8L)
keep <- sapply(df, function(x) length(unique(na.omit(x))) > 1)
df <- df[, keep]
kandidat <- setdiff(names(df), excl_cols)
kandidat <- kandidat[startsWith(kandidat, "X")]
if (length(kandidat) == 0) return(character(0))
# Pilih 1 representatif per grup X
kandidat <- pilih_per_grup(df, kandidat, y_col)
if (length(kandidat) == 0) return(character(0))
cor_v <- sapply(df[kandidat], function(x)
abs(cor(x, df[[y_col]], use = "complete.obs")))
cor_v <- cor_v[!is.na(cor_v)]
if (length(cor_v) == 0) return(character(0))
vars <- names(sort(cor_v, decreasing = TRUE))[1:min(max_top, length(cor_v))]
vars <- vars[!is.na(vars)]
repeat {
if (length(vars) <= 1) break
m <- try(lm(as.formula(paste(y_col,"~",paste(vars,collapse="+"))),data=df),silent=TRUE)
if (inherits(m,"try-error")) { vars <- vars[1]; break }
v <- try(vif(m), silent=TRUE)
if (inherits(v,"try-error")) break
v_ok <- v[!is.na(v)]
if (length(v_ok)==0 || all(v_ok<=10)) break
vars <- vars[vars != names(which.max(v_ok))]
}
if (length(vars) > 1) {
m <- lm(as.formula(paste(y_col,"~",paste(vars,collapse="+"))),data=df)
if (summary(m)$r.squared > r2_cap) vars <- vars[1]
}
return(vars)
}run_eblup <- function(df, vars) {
df$vardir <- pmax(df$vardir, 1e-10)
# Trim ke kolom yang dibutuhkan mseFH saja (seperti pola SAE_Lengkap_Fixed2)
# Kolom ekstra (grup_jenks, cluster_*, RSE_direct_col, dll) dikeluarkan
# agar mseFH tidak terganggu
df_m <- df[, c("Estimasi", "vardir", vars), drop = FALSE]
# Scale per kolom — lebih aman dari scale() bulk untuk kolom near-constant
for (v in vars) {
mu <- mean(df_m[[v]], na.rm = TRUE)
sd <- sd(df_m[[v]], na.rm = TRUE)
df_m[[v]] <- if (is.na(sd) || sd == 0) 0 else (df_m[[v]] - mu) / sd
df_m[[v]][is.na(df_m[[v]]) | is.nan(df_m[[v]])] <- 0
}
form <- as.formula(paste("Estimasi ~", paste(vars, collapse = "+")))
m <- try(mseFH(form, vardir = vardir, data = df_m), silent = TRUE)
if (inherits(m, "try-error") || is.null(m$est) || is.null(m$mse)) {
df$y_eblup <- df$Estimasi
df$mse <- df$vardir
} else {
ev <- as.numeric(m$est$eblup)
mv <- as.numeric(m$mse)
# Per-domain fallback: domain NaN/NA → pakai direct, bukan fallback seluruh segmen
bad <- is.na(ev) | is.nan(ev) | is.na(mv) | is.nan(mv)
if (any(bad)) {
message(" EBLUP: ", sum(bad), " domain NaN → fallback direct per domain")
ev[bad] <- df$Estimasi[bad]
mv[bad] <- df$vardir[bad]
}
df$y_eblup <- ev
df$mse <- mv
}
df$mse <- pmax(df$mse, 0)
df$RSE_direct <- sqrt(df$vardir) / pmax(df$Estimasi, 1e-6) * 100
df$RMSE_eblup <- sqrt(df$mse)
df$RSE_eblup <- df$RMSE_eblup / pmax(abs(df$y_eblup), 1e-6) * 100
df
}run_glmm <- function(df, vars) {
eps <- 1e-4
df$y_prop <- pmax(pmin(df$Estimasi / 100, 1 - eps), eps)
se_prop <- pmax(df$RSE / 100 * df$y_prop, eps)
df$n_eff <- pmax(as.integer(round(df$y_prop * (1 - df$y_prop) / se_prop^2)), 2L)
df$y_count <- pmin(as.integer(round(df$y_prop * df$n_eff)), df$n_eff)
df[vars] <- as.data.frame(scale(df[vars]))
df[vars][is.nan(as.matrix(df[vars]))] <- 0
form <- as.formula(paste(
"cbind(y_count, n_eff - y_count) ~",
paste(vars, collapse = " + "),
"+ (1 | Kako)"
))
ctrl <- glmmTMBControl(optCtrl = list(iter.max = 400, eval.max = 600))
m <- try(glmmTMB(form, data = df, family = binomial, control = ctrl),
silent = TRUE)
if (inherits(m, "try-error") || is.null(m)) {
message(" GLMM fit gagal → fallback direct")
df$y_glmm <- df$Estimasi
df$mse_glmm <- df$vardir
} else {
df$y_glmm <- plogis(predict(m, type = "link", re.form = NULL)) * 100
# Jackknife — seluruh loop dibungkus tryCatch
# Jika salah satu iterasi fatal crash, fallback ke MSE empiris
mse_v <- tryCatch({
n_d <- nrow(df)
jack_mat <- matrix(NA_real_, nrow = n_d, ncol = n_d)
for (i in seq_len(n_d)) {
df_j <- df[-i, ]
m_j <- try(glmmTMB(form, data = df_j, family = binomial,
control = glmmTMBControl(
optCtrl = list(iter.max = 300, eval.max = 400))),
silent = TRUE)
if (!inherits(m_j, "try-error")) {
pj <- try(plogis(predict(m_j, newdata = df, type = "link",
re.form = NA,
allow.new.levels = TRUE)) * 100,
silent = TRUE)
if (!inherits(pj, "try-error") && length(pj) == n_d)
jack_mat[i, ] <- pj
}
}
vapply(seq_len(n_d), function(j) {
ev <- jack_mat[, j][!is.na(jack_mat[, j])]
if (length(ev) >= 2) {
tb <- mean(ev)
((length(ev) - 1) / length(ev)) * mean((ev - tb)^2)
} else {
(df$RSE[j] / 100 * df$y_glmm[j])^2 # fallback per-domain
}
}, numeric(1))
}, error = function(e) {
# Jackknife crash total → fallback ke MSE empiris semua domain
message(" Jackknife crash: ", e$message, " → fallback MSE empiris")
(df$RSE / 100 * df$y_glmm)^2
})
df$mse_glmm <- pmax(mse_v, 0)
}
df$RSE_direct <- df$RSE
df$RMSE_glmm <- sqrt(df$mse_glmm)
df$RSE_glmm <- df$RMSE_glmm / pmax(df$y_glmm, 1e-6) * 100
df
}#' EB Beta-Binomial dengan covariate:
#' (1) Regresi logit(p) ~ vars → mu_i (domain-specific prior mean)
#' (2) Estimasi phi (precision) via MOM dari {p_i}
#' (3) EB: p_hat_i = (a_i + y_i) / (a_i + b_i + n_eff_i) x 100
#' (4) Var posterior analytik → RSE_eb
run_eb_beta <- function(df, vars) {
eps <- 1e-4
df$y_prop <- pmax(pmin(df$Estimasi / 100, 1 - eps), eps)
se_prop <- pmax(df$RSE / 100 * df$y_prop, eps)
df$n_eff <- pmax(as.integer(round(df$y_prop * (1 - df$y_prop) / se_prop^2)), 2L)
df$y_count <- pmin(as.integer(round(df$y_prop * df$n_eff)), df$n_eff)
df$y_logit_eb <- log(df$y_prop / (1 - df$y_prop))
df[vars] <- as.data.frame(scale(df[vars]))
df[vars][is.nan(as.matrix(df[vars]))] <- 0
# Regresi untuk mu_i (prior mean per domain)
form_reg <- as.formula(paste("y_logit_eb ~", paste(vars, collapse = "+")))
reg <- try(lm(form_reg, data = df), silent = TRUE)
mu_i <- if (inherits(reg, "try-error")) {
df$y_prop
} else {
pmax(pmin(plogis(fitted(reg)), 1 - eps), eps)
}
# MOM untuk phi (precision)
p_bar <- mean(df$y_prop)
var_p <- var(df$y_prop)
phi <- max((p_bar * (1 - p_bar) / var_p) - 1, 0.01)
a_i <- mu_i * phi
b_i <- (1 - mu_i) * phi
n_i <- df$n_eff
y_i <- df$y_count
# EB estimator
a_post <- a_i + y_i
b_post <- b_i + (n_i - y_i)
denom <- a_i + b_i + n_i
p_hat_eb <- a_post / denom
df$y_eb_pct <- p_hat_eb * 100
# Var posterior analytik (Beta conjugate)
var_post <- (a_post * b_post) / (denom^2 * (denom + 1))
df$var_eb_pct <- var_post * 100^2
df$RSE_direct <- df$RSE
df$RMSE_eb <- sqrt(df$var_eb_pct)
df$RSE_eb <- df$RMSE_eb / pmax(df$y_eb_pct, 1e-6) * 100
df
}run_hbbeta <- function(df, vars,
has_jags = get("has_jags", envir = .GlobalEnv)) {
if (!has_jags) {
df$y_hb_pct <- df$Estimasi; df$sd_hb <- NA_real_; df$RSE_hb <- df$RSE
return(df)
}
eps <- 1e-4
df$y_prop <- pmax(pmin(df$Estimasi / 100, 1 - eps), eps)
df[vars] <- as.data.frame(scale(df[vars]))
df[vars][is.nan(as.matrix(df[vars]))] <- 0
form_hb <- as.formula(paste("y_prop ~", paste(vars, collapse = "+")))
hb <- try(saeHB::Beta(
formula = form_hb, data = df,
iter.update = 15, iter.mcmc = 4000, thin = 10, burn.in = 1000
), silent = TRUE)
if (inherits(hb, "try-error") || is.null(hb$Est)) {
message(" HB Beta gagal → fallback direct")
df$y_hb_pct <- df$Estimasi; df$sd_hb <- NA_real_; df$RSE_hb <- df$RSE
} else {
df$y_hb_pct <- hb$Est$MEAN * 100
df$sd_hb <- hb$Est$SD
df$RSE_hb <- df$sd_hb / pmax(df$y_hb_pct / 100, 1e-6) * 100
}
df
}# ── Plot RSE line (2 metode) ─────────────────────────────────────────────
plot_rse <- function(data, col1, col2, lab1, lab2, title) {
miss <- setdiff(c(col1, col2), names(data))
if (length(miss) > 0) { message("Kolom tidak ada: ", paste(miss)); return(invisible(NULL)) }
df_plt <- data %>%
dplyr::select(Kako, all_of(c(col1, col2))) %>%
pivot_longer(-Kako, names_to = "Metode", values_to = "RSE") %>%
mutate(Metode = case_when(Metode == col1 ~ lab1, Metode == col2 ~ lab2, TRUE ~ Metode),
Kako = as.character(Kako)) %>%
filter(!is.na(RSE))
ggplot(df_plt, aes(x = Kako, y = RSE, group = Metode, color = Metode)) +
geom_line(linewidth = 0.8, alpha = 0.85) +
geom_point(size = 1.6) +
geom_hline(yintercept = 25, linetype = "dashed", color = "#c0392b", linewidth = 0.9) +
annotate("text", x = 1, y = 26.5, label = "Batas 25%", color = "#c0392b",
hjust = 0, size = 3) +
scale_color_manual(values = c("#2980b9", "#27ae60")) +
labs(title = title, x = "Kabupaten/Kota", y = "RSE (%)", color = NULL) +
theme_minimal(base_size = 11) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 6),
legend.position = "top", plot.title = element_text(face = "bold", size = 12))
}
# ── Plot HB Beta Posterior (dot + CI per domain) ─────────────────────────
plot_hb_posterior <- function(df, title, group_col = NULL) {
eps <- 1e-4
df_p <- df %>%
filter(!is.na(y_hb_pct) & !is.na(sd_hb)) %>%
mutate(
ci_lo = pmax((y_hb_pct / 100 - 1.96 * sd_hb) * 100, 0),
ci_hi = pmin((y_hb_pct / 100 + 1.96 * sd_hb) * 100, 100),
Domain = reorder(as.character(Kako), y_hb_pct)
)
if (!is.null(group_col) && group_col %in% names(df_p)) {
df_p$Grup <- df_p[[group_col]]
} else {
df_p$Grup <- "Semua Domain"
}
p <- ggplot(df_p, aes(x = Domain)) +
geom_errorbar(aes(ymin = ci_lo, ymax = ci_hi, color = Grup),
width = 0.3, alpha = 0.55) +
geom_point(aes(y = y_hb_pct, color = Grup), size = 2) +
geom_point(aes(y = Estimasi), color = "#c0392b", size = 1.2, alpha = 0.6, shape = 4) +
geom_hline(yintercept = mean(df_p$y_hb_pct, na.rm = TRUE),
linetype = "dashed", color = "#2ecc71", linewidth = 0.8) +
scale_color_manual(values = c("#2980b9","#e67e22","#8e44ad","#16a085")) +
labs(title = title,
subtitle = "Titik biru = HB | Error bar = 95% CI | Silang merah = Direct | Garis hijau = Rata-rata HB",
x = "Kabupaten/Kota (urut HB)", y = "Estimasi (%)", color = "Grup") +
theme_minimal(base_size = 10) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 5),
legend.position = "top",
plot.title = element_text(face = "bold", size = 12),
plot.subtitle = element_text(size = 9))
p
}
# ── Utilitas evaluasi ────────────────────────────────────────────────────
pct_ok <- function(x) round(mean(x < 25, na.rm = TRUE) * 100, 1)
pct_ok_15 <- function(x) round(mean(x < 15, na.rm = TRUE) * 100, 1)
cv_mean <- function(x) round(mean(x, na.rm = TRUE), 2)
tabel_rse <- function(data, var_rse, label) {
data.frame(
Skenario = label,
`RSE >= 25` = sum(data[[var_rse]] >= 25, na.rm = TRUE),
`RSE < 25` = sum(data[[var_rse]] < 25, na.rm = TRUE),
`% < 25%` = pct_ok(data[[var_rse]]),
`% < 15%` = pct_ok_15(data[[var_rse]]),
`CV Mean` = cv_mean(data[[var_rse]]),
check.names = FALSE
)
}
metrik <- function(est_model, est_direct, label) {
data.frame(
Skenario = label,
`RB (%)` = round(mean((est_model - est_direct) / est_direct, na.rm=TRUE)*100, 3),
`RMSE` = round(sqrt(mean((est_model - est_direct)^2, na.rm=TRUE)), 4),
check.names = FALSE
)
}
# ── Fungsi utama: jalankan model pada list segmen ────────────────────────
# aux_fn : seleksi_backward atau seleksi_topn
# model_fn: run_eblup / run_glmm / run_eb_beta / run_hbbeta
# y_col : "Estimasi" (EBLUP) atau "y_logit" (GLMM/EB/HB)
# Catatan: excl_cols tidak perlu dioper — kedua fungsi seleksi
# sudah auto-detect kolom non-X secara dinamis
run_on_segments <- function(seg_list, aux_fn, model_fn, y_col,
min_n = 4, ...) {
hasil <- list()
for (nm in names(seg_list)) {
df_s <- seg_list[[nm]]
if (nrow(df_s) < min_n) { message(" Skip ", nm, " (n=", nrow(df_s), ")"); next }
vars <- tryCatch(
aux_fn(df_s, y_col = y_col),
error = function(e) { message(" VarSel error [", nm, "]: ", e$message); character(0) }
)
if (length(vars) == 0) {
message(" Tidak ada var terpilih untuk ", nm, " — skip")
next
}
cat(" [", nm, "]", y_col, "| n =", nrow(df_s),
"| Vars:", paste(vars, collapse=", "), "\n")
res <- tryCatch(
model_fn(df_s, vars, ...),
error = function(e) {
message(" Model error [", nm, "]: ", e$message)
NULL
}
)
if (!is.null(res)) hasil[[nm]] <- res
}
if (length(hasil) == 0) return(NULL)
bind_rows(hasil)
}# RSE_direct_col: hanya untuk pembagian partisi, BUKAN kandidat aux
# Karena nama tidak berawalan "X", otomatis tereksklusi oleh kedua fungsi seleksi
df_base$RSE_direct_col <- sqrt(df_base$vardir) / pmax(df_base$Estimasi, 1e-6) * 100
brk_jenks <- classIntervals(df_base$RSE_direct_col, n = 2, style = "jenks")
df_base$grup_jenks <- cut(df_base$RSE_direct_col,
breaks = brk_jenks$brks,
labels = c("G1_RSE_Rendah", "G2_RSE_Tinggi"),
include.lowest = TRUE)
cat("Cut-off Jenks:", round(brk_jenks$brks[2], 2), "%\n")## Cut-off Jenks: 53.58 %
##
## G1_RSE_Rendah G2_RSE_Tinggi
## 62 19
med_rse <- median(df_base$RSE_direct_col, na.rm = TRUE)
df_base$grup_equal <- ifelse(df_base$RSE_direct_col <= med_rse,
"G1_RSE_Bawah", "G2_RSE_Atas")
cat("Median RSE:", round(med_rse, 2), "%\n")## Median RSE: 44.36 %
##
## G1_RSE_Bawah G2_RSE_Atas
## 41 40
# Fungsi helper cluster
make_cluster <- function(df_full, vars_cl, seed = 42) {
mat <- scale(df_full[, vars_cl, drop = FALSE])
mat[is.nan(mat)] <- 0
set.seed(seed)
km <- kmeans(mat, centers = 2, nstart = 50, iter.max = 200)
paste0("Cluster_", km$cluster)
}
# === Global var selection (ALL data) untuk menentukan vars clustering ===
# A1 (Backward) — Normal link
vars_gbk_normal <- seleksi_backward(df_base, y_col = "Estimasi")
cat("Cluster-A1-Normal vars:", paste(vars_gbk_normal, collapse=", "), "\n")## Cluster-A1-Normal vars: X45, X28
df_base$cluster_bk_normal <- make_cluster(df_base, vars_gbk_normal)
# A2 (TopN) — Normal link
vars_gtn_normal <- seleksi_topn(df_base, y_col = "Estimasi")
cat("Cluster-A2-Normal vars:", paste(vars_gtn_normal, collapse=", "), "\n")## Cluster-A2-Normal vars: X45, X8, X36_mean, X28, X29_mean, X30_mean, X31_mean
df_base$cluster_tn_normal <- make_cluster(df_base, vars_gtn_normal)
# A1 (Backward) — Logit link
vars_gbk_logit <- seleksi_backward(df_base, y_col = "y_logit")
cat("Cluster-A1-Logit vars:", paste(vars_gbk_logit, collapse=", "), "\n")## Cluster-A1-Logit vars: X8, X45, X44
df_base$cluster_bk_logit <- make_cluster(df_base, vars_gbk_logit)
# A2 (TopN) — Logit link
vars_gtn_logit <- seleksi_topn(df_base, y_col = "y_logit")
cat("Cluster-A2-Logit vars:", paste(vars_gtn_logit, collapse=", "), "\n")## Cluster-A2-Logit vars: X8, X45, X44, X39, X36_mean, X30_mean, X28, X1
df_base$cluster_tn_logit <- make_cluster(df_base, vars_gtn_logit)
# Ringkasan cluster
kable(
data.frame(
Cluster_Tipe = c("A1-Normal","A2-Normal","A1-Logit","A2-Logit"),
Variabel_Global = c(paste(vars_gbk_normal, collapse=", "),
paste(vars_gtn_normal, collapse=", "),
paste(vars_gbk_logit, collapse=", "),
paste(vars_gtn_logit, collapse=", "))
), caption = "Variabel Global untuk Pembentukan Cluster") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed"))| Cluster_Tipe | Variabel_Global |
|---|---|
| A1-Normal | X45, X28 |
| A2-Normal | X45, X8, X36_mean, X28, X29_mean, X30_mean, X31_mean |
| A1-Logit | X8, X45, X44 |
| A2-Logit | X8, X45, X44, X39, X36_mean, X30_mean, X28, X1 |
p1 <- ggplot(df_base, aes(x = RSE_direct_col, fill = grup_jenks)) +
geom_histogram(bins = 25, color = "white", alpha = 0.85) +
geom_vline(xintercept = brk_jenks$brks[2], linetype="dashed", color="#c0392b") +
scale_fill_manual(values = c("#2980b9","#e67e22")) +
labs(title = "C2: RSE Natural Break (Jenks)", x = "RSE Direct (%)", y = "n", fill=NULL) +
theme_minimal(base_size = 11) + theme(legend.position = "top")
p2 <- ggplot(df_base, aes(x = RSE_direct_col, fill = grup_equal)) +
geom_histogram(bins = 25, color = "white", alpha = 0.85) +
geom_vline(xintercept = med_rse, linetype="dashed", color="#c0392b") +
scale_fill_manual(values = c("#2980b9","#e67e22")) +
labs(title = "C3: RSE Equal Size (Median)", x = "RSE Direct (%)", y = "n", fill=NULL) +
theme_minimal(base_size = 11) + theme(legend.position = "top")
p3 <- ggplot(df_base, aes(x = Estimasi, fill = cluster_bk_normal)) +
geom_boxplot(alpha = 0.8) +
scale_fill_manual(values = c("#2980b9","#27ae60")) +
labs(title = "C4 Cluster (A1-Normal)", x = "Estimasi (%)", fill=NULL) +
theme_minimal(base_size = 11) + theme(legend.position = "top")
p4 <- ggplot(df_base, aes(x = Estimasi, fill = cluster_bk_logit)) +
geom_boxplot(alpha = 0.8) +
scale_fill_manual(values = c("#2980b9","#27ae60")) +
labs(title = "C4 Cluster (A1-Logit)", x = "Estimasi (%)", fill=NULL) +
theme_minimal(base_size = 11) + theme(legend.position = "top")
(p1 | p2) / (p3 | p4)Skenario 1–8 — Model EBLUP Fay-Herriot pada skala normal. Skenario 1–4: seleksi Backward (A1). Skenario 5–8: seleksi Top-n/10 (A2). Masing-masing 4 partisi: All, RSE-NB, RSE-ES, Cluster.
## === Skenario 1: EBLUP Backward All ===
df_s1 <- run_on_segments(
seg_list = list("All" = df_base),
aux_fn = seleksi_backward,
model_fn = run_eblup,
y_col = "Estimasi"
)## [ All ] Estimasi | n = 81 | Vars: X45, X28
## Estimasi y_eblup RSE_direct RSE_eblup
## Min. : 0.08 Min. : 0.0836 Min. : 22.9 Min. :21.7
## 1st Qu.: 6.18 1st Qu.: 6.8055 1st Qu.: 38.3 1st Qu.:25.9
## Median : 9.32 Median : 8.8513 Median : 44.4 Median :28.2
## Mean :10.08 Mean : 8.2477 Mean : 47.6 Mean :32.6
## 3rd Qu.:13.93 3rd Qu.:10.1615 3rd Qu.: 53.5 3rd Qu.:32.3
## Max. :24.05 Max. :13.6089 Max. :101.7 Max. :97.3
S1: RSE Direct vs EBLUP Backward (All)
## === Skenario 2: EBLUP Backward RSE-NB ===
df_s2 <- run_on_segments(
seg_list = split(df_base, df_base$grup_jenks),
aux_fn = seleksi_backward, model_fn = run_eblup, y_col = "Estimasi"
)## [ G1_RSE_Rendah ] Estimasi | n = 62 | Vars: X30, X8, X36_mean, X19_mean, X27_mean
## [ G2_RSE_Tinggi ] Estimasi | n = 19 | Vars: X45_mean
## Estimasi y_eblup RSE_direct RSE_eblup
## Min. : 0.08 Min. : 0.087 Min. : 22.9 Min. : 10.6
## 1st Qu.: 6.18 1st Qu.: 4.060 1st Qu.: 38.3 1st Qu.: 13.7
## Median : 9.32 Median : 9.035 Median : 44.4 Median : 17.7
## Mean :10.08 Mean : 8.102 Mean : 47.6 Mean : 27.4
## 3rd Qu.:13.93 3rd Qu.:11.322 3rd Qu.: 53.5 3rd Qu.: 42.4
## Max. :24.05 Max. :15.076 Max. :101.7 Max. :108.7
plot_rse(df_s2,"RSE_direct","RSE_eblup","Direct","EBLUP-BK RSE-NB","S2: EBLUP Backward — RSE Natural Break")S2: EBLUP Backward RSE Natural Break
## === Skenario 3: EBLUP Backward RSE-ES ===
df_s3 <- run_on_segments(
seg_list = split(df_base, df_base$grup_equal),
aux_fn = seleksi_backward, model_fn = run_eblup, y_col = "Estimasi"
)## [ G1_RSE_Bawah ] Estimasi | n = 41 | Vars: X19, X27_jumlah
## [ G2_RSE_Atas ] Estimasi | n = 40 | Vars: X45, X12_mean
## Estimasi y_eblup RSE_direct RSE_eblup
## Min. : 0.08 Min. : 0.0847 Min. : 22.9 Min. :15.4
## 1st Qu.: 6.18 1st Qu.: 6.0388 1st Qu.: 38.3 1st Qu.:18.0
## Median : 9.32 Median : 7.5083 Median : 44.4 Median :30.4
## Mean :10.08 Mean : 8.4013 Mean : 47.6 Mean :31.2
## 3rd Qu.:13.93 3rd Qu.:12.3065 3rd Qu.: 53.5 3rd Qu.:34.7
## Max. :24.05 Max. :16.0132 Max. :101.7 Max. :95.9
plot_rse(df_s3,"RSE_direct","RSE_eblup","Direct","EBLUP-BK RSE-ES","S3: EBLUP Backward — RSE Equal Size")S3: EBLUP Backward RSE Equal Size
## === Skenario 4: EBLUP Backward Cluster ===
df_s4 <- run_on_segments(
seg_list = split(df_base, df_base$cluster_bk_normal),
aux_fn = seleksi_backward, model_fn = run_eblup, y_col = "Estimasi"
)## [ Cluster_1 ] Estimasi | n = 18 | Vars: X39
## [ Cluster_2 ] Estimasi | n = 63 | Vars: X29_jumlah, X25_jumlah
## Estimasi y_eblup RSE_direct RSE_eblup
## Min. : 0.08 Min. : 0.0849 Min. : 22.9 Min. :20.6
## 1st Qu.: 6.18 1st Qu.: 6.2321 1st Qu.: 38.3 1st Qu.:25.6
## Median : 9.32 Median : 8.8138 Median : 44.4 Median :28.7
## Mean :10.08 Mean : 8.3312 Mean : 47.6 Mean :33.6
## 3rd Qu.:13.93 3rd Qu.:10.6945 3rd Qu.: 53.5 3rd Qu.:36.1
## Max. :24.05 Max. :13.3972 Max. :101.7 Max. :95.8
plot_rse(df_s4,"RSE_direct","RSE_eblup","Direct","EBLUP-BK Cluster","S4: EBLUP Backward — Cluster Aux")S4: EBLUP Backward Cluster-Aux
## === Skenario 5: EBLUP TopN All ===
df_s5 <- run_on_segments(
seg_list = list("All" = df_base),
aux_fn = seleksi_topn, model_fn = run_eblup, y_col = "Estimasi"
)## [ All ] Estimasi | n = 81 | Vars: X45, X8, X36_mean, X28, X29_mean, X30_mean, X31_mean
## Estimasi y_eblup RSE_direct RSE_eblup
## Min. : 0.08 Min. : 0.082 Min. : 22.9 Min. :22.6
## 1st Qu.: 6.18 1st Qu.: 6.731 1st Qu.: 38.3 1st Qu.:26.4
## Median : 9.32 Median : 9.065 Median : 44.4 Median :28.7
## Mean :10.08 Mean : 8.331 Mean : 47.6 Mean :33.8
## 3rd Qu.:13.93 3rd Qu.:10.467 3rd Qu.: 53.5 3rd Qu.:33.2
## Max. :24.05 Max. :13.562 Max. :101.7 Max. :99.2
S5: EBLUP Top-n/10 All
## === Skenario 6: EBLUP TopN RSE-NB ===
df_s6 <- run_on_segments(
seg_list = split(df_base, df_base$grup_jenks),
aux_fn = seleksi_topn, model_fn = run_eblup, y_col = "Estimasi"
)## [ G1_RSE_Rendah ] Estimasi | n = 62 | Vars: X30, X8, X36_mean, X29_mean, X19_mean, X27_mean
## [ G2_RSE_Tinggi ] Estimasi | n = 19 | Vars: X45_mean
## Estimasi y_eblup RSE_direct RSE_eblup
## Min. : 0.08 Min. : 0.087 Min. : 22.9 Min. : 11.4
## 1st Qu.: 6.18 1st Qu.: 4.154 1st Qu.: 38.3 1st Qu.: 14.5
## Median : 9.32 Median : 8.954 Median : 44.4 Median : 18.9
## Mean :10.08 Mean : 8.143 Mean : 47.6 Mean : 28.5
## 3rd Qu.:13.93 3rd Qu.:11.252 3rd Qu.: 53.5 3rd Qu.: 42.4
## Max. :24.05 Max. :15.393 Max. :101.7 Max. :108.7
plot_rse(df_s6,"RSE_direct","RSE_eblup","Direct","EBLUP-TN RSE-NB","S6: EBLUP Top-n/10 — RSE Natural Break")S6: EBLUP Top-n/10 RSE Natural Break
## === Skenario 7: EBLUP TopN RSE-ES ===
df_s7 <- run_on_segments(
seg_list = split(df_base, df_base$grup_equal),
aux_fn = seleksi_topn, model_fn = run_eblup, y_col = "Estimasi"
)## [ G1_RSE_Bawah ] Estimasi | n = 41 | Vars: X29_jumlah, X19, X10_jumlah, X27_jumlah
## [ G2_RSE_Atas ] Estimasi | n = 40 | Vars: X45, X30_mean, X12_mean, X11_mean
## Estimasi y_eblup RSE_direct RSE_eblup
## Min. : 0.08 Min. : 0.0841 Min. : 22.9 Min. :16.5
## 1st Qu.: 6.18 1st Qu.: 6.0042 1st Qu.: 38.3 1st Qu.:20.2
## Median : 9.32 Median : 7.5411 Median : 44.4 Median :31.4
## Mean :10.08 Mean : 8.5056 Mean : 47.6 Mean :33.0
## 3rd Qu.:13.93 3rd Qu.:12.3161 3rd Qu.: 53.5 3rd Qu.:36.5
## Max. :24.05 Max. :16.2031 Max. :101.7 Max. :96.7
plot_rse(df_s7,"RSE_direct","RSE_eblup","Direct","EBLUP-TN RSE-ES","S7: EBLUP Top-n/10 — RSE Equal Size")S7: EBLUP Top-n/10 RSE Equal Size
## === Skenario 8: EBLUP TopN Cluster ===
df_s8 <- run_on_segments(
seg_list = split(df_base, df_base$cluster_tn_normal),
aux_fn = seleksi_topn, model_fn = run_eblup, y_col = "Estimasi"
)## [ Cluster_1 ] Estimasi | n = 69 | Vars: X10_jumlah, X25_jumlah, X11_jumlah, X27_jumlah, X36_jumlah, X29_jumlah
## [ Cluster_2 ] Estimasi | n = 12 | Vars: X13_jumlah
## Estimasi y_eblup RSE_direct RSE_eblup
## Min. : 0.08 Min. : 0.084 Min. : 22.9 Min. :22.2
## 1st Qu.: 6.18 1st Qu.: 6.625 1st Qu.: 38.3 1st Qu.:28.1
## Median : 9.32 Median : 8.864 Median : 44.4 Median :31.8
## Mean :10.08 Mean : 8.430 Mean : 47.6 Mean :35.8
## 3rd Qu.:13.93 3rd Qu.:10.957 3rd Qu.: 53.5 3rd Qu.:39.6
## Max. :24.05 Max. :13.700 Max. :101.7 Max. :96.9
plot_rse(df_s8,"RSE_direct","RSE_eblup","Direct","EBLUP-TN Cluster","S8: EBLUP Top-n/10 — Cluster Aux")S8: EBLUP Top-n/10 Cluster-Aux
Skenario 9–16 — GLMM binomial via
glmmTMB, random intercept per domain (1|Kako).
Proporsi dikonversi ke count efektif n_eff = p(1-p)/SE².
MSE via Jackknife leave-one-domain-out. Seleksi variabel pada skala
logit.
## === Skenario 9: GLMM Backward All ===
df_s9 <- run_on_segments(
seg_list = list("All" = df_base),
aux_fn = seleksi_backward, model_fn = run_glmm, y_col = "y_logit"
)## [ All ] y_logit | n = 81 | Vars: X8, X45, X44
## Estimasi y_glmm RSE_direct RSE_glmm
## Min. : 0.08 Min. : 0.525 Min. : 22.9 Min. : 0.515
## 1st Qu.: 6.18 1st Qu.: 6.539 1st Qu.: 38.3 1st Qu.: 1.026
## Median : 9.32 Median : 9.803 Median : 44.4 Median : 1.402
## Mean :10.08 Mean : 9.241 Mean : 47.6 Mean : 2.231
## 3rd Qu.:13.93 3rd Qu.:11.928 3rd Qu.: 53.5 3rd Qu.: 1.895
## Max. :24.05 Max. :19.039 Max. :101.7 Max. :28.091
S9: GLMM Backward All
## === Skenario 10: GLMM Backward RSE-NB ===
df_s10 <- run_on_segments(
seg_list = split(df_base, df_base$grup_jenks),
aux_fn = seleksi_backward, model_fn = run_glmm, y_col = "y_logit"
)## [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X29_mean
## [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8
## Estimasi y_glmm RSE_direct RSE_glmm
## Min. : 0.08 Min. : 0.269 Min. : 22.9 Min. : 0.686
## 1st Qu.: 6.18 1st Qu.: 5.940 1st Qu.: 38.3 1st Qu.: 0.877
## Median : 9.32 Median :10.374 Median : 44.4 Median : 0.960
## Mean :10.08 Mean : 9.116 Mean : 47.6 Mean : 3.494
## 3rd Qu.:13.93 3rd Qu.:12.015 3rd Qu.: 53.5 3rd Qu.: 2.570
## Max. :24.05 Max. :14.088 Max. :101.7 Max. :62.014
plot_rse(df_s10,"RSE_direct","RSE_glmm","Direct","GLMM-BK RSE-NB","S10: GLMM Backward — RSE Natural Break")S10: GLMM Backward RSE Natural Break
## === Skenario 11: GLMM Backward RSE-ES ===
df_s11 <- run_on_segments(
seg_list = split(df_base, df_base$grup_equal),
aux_fn = seleksi_backward, model_fn = run_glmm, y_col = "y_logit"
)## [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah
## [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45
## Estimasi y_glmm RSE_direct RSE_glmm
## Min. : 0.08 Min. : 0.474 Min. : 22.9 Min. : 1.01
## 1st Qu.: 6.18 1st Qu.: 6.281 1st Qu.: 38.3 1st Qu.: 1.35
## Median : 9.32 Median : 8.597 Median : 44.4 Median : 2.01
## Mean :10.08 Mean : 9.164 Mean : 47.6 Mean : 2.93
## 3rd Qu.:13.93 3rd Qu.:13.092 3rd Qu.: 53.5 3rd Qu.: 2.70
## Max. :24.05 Max. :16.780 Max. :101.7 Max. :32.97
plot_rse(df_s11,"RSE_direct","RSE_glmm","Direct","GLMM-BK RSE-ES","S11: GLMM Backward — RSE Equal Size")S11: GLMM Backward RSE Equal Size
## === Skenario 12: GLMM Backward Cluster ===
df_s12 <- run_on_segments(
seg_list = split(df_base, df_base$cluster_bk_logit),
aux_fn = seleksi_backward, model_fn = run_glmm, y_col = "y_logit"
)## [ Cluster_1 ] y_logit | n = 10 | Vars: X30
## [ Cluster_2 ] y_logit | n = 71 | Vars: X45_mean, X25_jumlah, X33_mean
## Estimasi y_glmm RSE_direct RSE_glmm
## Min. : 0.08 Min. : 0.155 Min. : 22.9 Min. : 0.801
## 1st Qu.: 6.18 1st Qu.: 7.112 1st Qu.: 38.3 1st Qu.: 1.154
## Median : 9.32 Median : 9.754 Median : 44.4 Median : 1.631
## Mean :10.08 Mean : 9.306 Mean : 47.6 Mean : 5.832
## 3rd Qu.:13.93 3rd Qu.:11.721 3rd Qu.: 53.5 3rd Qu.: 2.441
## Max. :24.05 Max. :16.703 Max. :101.7 Max. :189.506
plot_rse(df_s12,"RSE_direct","RSE_glmm","Direct","GLMM-BK Cluster","S12: GLMM Backward — Cluster Aux")S12: GLMM Backward Cluster-Aux
## === Skenario 13: GLMM TopN All ===
df_s13 <- run_on_segments(
seg_list = list("All" = df_base),
aux_fn = seleksi_topn, model_fn = run_glmm, y_col = "y_logit"
)## [ All ] y_logit | n = 81 | Vars: X8, X45, X44, X39, X36_mean, X30_mean, X28, X1
## Estimasi y_glmm RSE_direct RSE_glmm
## Min. : 0.08 Min. : 0.501 Min. : 22.9 Min. : 1.43
## 1st Qu.: 6.18 1st Qu.: 6.417 1st Qu.: 38.3 1st Qu.: 2.15
## Median : 9.32 Median : 9.777 Median : 44.4 Median : 2.75
## Mean :10.08 Mean : 9.253 Mean : 47.6 Mean : 5.50
## 3rd Qu.:13.93 3rd Qu.:11.880 3rd Qu.: 53.5 3rd Qu.: 3.96
## Max. :24.05 Max. :18.607 Max. :101.7 Max. :141.02
S13: GLMM Top-n/10 All
## === Skenario 14: GLMM TopN RSE-NB ===
df_s14 <- run_on_segments(
seg_list = split(df_base, df_base$grup_jenks),
aux_fn = seleksi_topn, model_fn = run_glmm, y_col = "y_logit"
)## [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X36_mean, X29_mean, X31_mean, X26
## [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8
## Estimasi y_glmm RSE_direct RSE_glmm
## Min. : 0.08 Min. : 0.269 Min. : 22.9 Min. : 0.892
## 1st Qu.: 6.18 1st Qu.: 5.882 1st Qu.: 38.3 1st Qu.: 1.189
## Median : 9.32 Median :10.249 Median : 44.4 Median : 1.702
## Mean :10.08 Mean : 9.107 Mean : 47.6 Mean : 4.077
## 3rd Qu.:13.93 3rd Qu.:12.220 3rd Qu.: 53.5 3rd Qu.: 3.276
## Max. :24.05 Max. :14.690 Max. :101.7 Max. :62.014
plot_rse(df_s14,"RSE_direct","RSE_glmm","Direct","GLMM-TN RSE-NB","S14: GLMM Top-n/10 — RSE Natural Break")S14: GLMM Top-n/10 RSE Natural Break
## === Skenario 15: GLMM TopN RSE-ES ===
df_s15 <- run_on_segments(
seg_list = split(df_base, df_base$grup_equal),
aux_fn = seleksi_topn, model_fn = run_glmm, y_col = "y_logit"
)## [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah, X10_jumlah, X8
## [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45, X8, X1, X40_jumlah
## Estimasi y_glmm RSE_direct RSE_glmm
## Min. : 0.08 Min. : 0.471 Min. : 22.9 Min. : 1.01
## 1st Qu.: 6.18 1st Qu.: 6.358 1st Qu.: 38.3 1st Qu.: 1.52
## Median : 9.32 Median : 8.997 Median : 44.4 Median : 2.74
## Mean :10.08 Mean : 9.208 Mean : 47.6 Mean : 4.21
## 3rd Qu.:13.93 3rd Qu.:12.115 3rd Qu.: 53.5 3rd Qu.: 3.96
## Max. :24.05 Max. :17.228 Max. :101.7 Max. :61.53
plot_rse(df_s15,"RSE_direct","RSE_glmm","Direct","GLMM-TN RSE-ES","S15: GLMM Top-n/10 — RSE Equal Size")S15: GLMM Top-n/10 RSE Equal Size
## === Skenario 16: GLMM TopN Cluster ===
df_s16 <- run_on_segments(
seg_list = split(df_base, df_base$cluster_tn_logit),
aux_fn = seleksi_topn, model_fn = run_glmm, y_col = "y_logit"
)## [ Cluster_1 ] y_logit | n = 67 | Vars: X45, X25_jumlah, X11_mean, X26_mean, X43_mean, X10_jumlah
## [ Cluster_2 ] y_logit | n = 14 | Vars: X30
## Estimasi y_glmm RSE_direct RSE_glmm
## Min. : 0.08 Min. : 0.168 Min. : 22.9 Min. : 0.976
## 1st Qu.: 6.18 1st Qu.: 7.509 1st Qu.: 38.3 1st Qu.: 1.761
## Median : 9.32 Median : 9.628 Median : 44.4 Median : 2.437
## Mean :10.08 Mean : 9.327 Mean : 47.6 Mean : 7.994
## 3rd Qu.:13.93 3rd Qu.:11.749 3rd Qu.: 53.5 3rd Qu.: 4.221
## Max. :24.05 Max. :16.115 Max. :101.7 Max. :281.082
plot_rse(df_s16,"RSE_direct","RSE_glmm","Direct","GLMM-TN Cluster","S16: GLMM Top-n/10 — Cluster Aux")S16: GLMM Top-n/10 Cluster-Aux
Skenario 17–24 — EB Beta-Binomial. Regresi
logit(p) ~ vars menghasilkan μᵢ (prior mean
domain-spesifik). Presisi φ diestimasi via Method of
Moments. Estimasi EB: p̂ᵢ = (aᵢ + yᵢ) / (aᵢ + bᵢ + nᵢ). MSE
dari variansi posterior Beta analytik.
## === Skenario 17: EB Backward All ===
df_s17 <- run_on_segments(
seg_list = list("All" = df_base),
aux_fn = seleksi_backward, model_fn = run_eb_beta, y_col = "y_logit"
)## [ All ] y_logit | n = 81 | Vars: X8, X45, X44
## Estimasi y_eb_pct RSE_direct RSE_eb
## Min. : 0.08 Min. : 0.134 Min. : 22.9 Min. :21.0
## 1st Qu.: 6.18 1st Qu.: 6.668 1st Qu.: 38.3 1st Qu.:31.5
## Median : 9.32 Median : 9.905 Median : 44.4 Median :35.3
## Mean :10.08 Mean : 9.410 Mean : 47.6 Mean :37.7
## 3rd Qu.:13.93 3rd Qu.:12.489 3rd Qu.: 53.5 3rd Qu.:40.7
## Max. :24.05 Max. :19.161 Max. :101.7 Max. :83.7
S17: EB Beta Backward All
## === Skenario 18: EB Backward RSE-NB ===
df_s18 <- run_on_segments(
seg_list = split(df_base, df_base$grup_jenks),
aux_fn = seleksi_backward, model_fn = run_eb_beta, y_col = "y_logit"
)## [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X29_mean
## [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8
## Estimasi y_eb_pct RSE_direct RSE_eb
## Min. : 0.08 Min. : 0.106 Min. : 22.9 Min. :20.1
## 1st Qu.: 6.18 1st Qu.: 6.688 1st Qu.: 38.3 1st Qu.:28.7
## Median : 9.32 Median : 9.823 Median : 44.4 Median :31.6
## Mean :10.08 Mean : 9.486 Mean : 47.6 Mean :36.3
## 3rd Qu.:13.93 3rd Qu.:12.608 3rd Qu.: 53.5 3rd Qu.:40.7
## Max. :24.05 Max. :18.708 Max. :101.7 Max. :87.1
plot_rse(df_s18,"RSE_direct","RSE_eb","Direct","EB-BK RSE-NB","S18: EB Beta-Binomial Backward — RSE NB")S18: EB Beta Backward RSE Natural Break
## === Skenario 19: EB Backward RSE-ES ===
df_s19 <- run_on_segments(
seg_list = split(df_base, df_base$grup_equal),
aux_fn = seleksi_backward, model_fn = run_eb_beta, y_col = "y_logit"
)## [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah
## [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45
## Estimasi y_eb_pct RSE_direct RSE_eb
## Min. : 0.08 Min. : 0.183 Min. : 22.9 Min. :20.1
## 1st Qu.: 6.18 1st Qu.: 6.204 1st Qu.: 38.3 1st Qu.:27.5
## Median : 9.32 Median : 8.313 Median : 44.4 Median :33.4
## Mean :10.08 Mean : 9.469 Mean : 47.6 Mean :35.4
## 3rd Qu.:13.93 3rd Qu.:13.217 3rd Qu.: 53.5 3rd Qu.:39.9
## Max. :24.05 Max. :19.118 Max. :101.7 Max. :77.3
plot_rse(df_s19,"RSE_direct","RSE_eb","Direct","EB-BK RSE-ES","S19: EB Beta-Binomial Backward — RSE ES")S19: EB Beta Backward RSE Equal Size
## === Skenario 20: EB Backward Cluster ===
df_s20 <- run_on_segments(
seg_list = split(df_base, df_base$cluster_bk_logit),
aux_fn = seleksi_backward, model_fn = run_eb_beta, y_col = "y_logit"
)## [ Cluster_1 ] y_logit | n = 10 | Vars: X30
## [ Cluster_2 ] y_logit | n = 71 | Vars: X45_mean, X25_jumlah, X33_mean
## Estimasi y_eb_pct RSE_direct RSE_eb
## Min. : 0.08 Min. : 0.0873 Min. : 22.9 Min. :21.4
## 1st Qu.: 6.18 1st Qu.: 6.5408 1st Qu.: 38.3 1st Qu.:30.4
## Median : 9.32 Median : 9.7703 Median : 44.4 Median :33.9
## Mean :10.08 Mean : 9.4931 Mean : 47.6 Mean :36.4
## 3rd Qu.:13.93 3rd Qu.:12.3782 3rd Qu.: 53.5 3rd Qu.:38.0
## Max. :24.05 Max. :17.7431 Max. :101.7 Max. :95.8
plot_rse(df_s20,"RSE_direct","RSE_eb","Direct","EB-BK Cluster","S20: EB Beta-Binomial Backward — Cluster")S20: EB Beta Backward Cluster-Aux
## === Skenario 21: EB TopN All ===
df_s21 <- run_on_segments(
seg_list = list("All" = df_base),
aux_fn = seleksi_topn, model_fn = run_eb_beta, y_col = "y_logit"
)## [ All ] y_logit | n = 81 | Vars: X8, X45, X44, X39, X36_mean, X30_mean, X28, X1
## Estimasi y_eb_pct RSE_direct RSE_eb
## Min. : 0.08 Min. : 0.122 Min. : 22.9 Min. :21.2
## 1st Qu.: 6.18 1st Qu.: 6.516 1st Qu.: 38.3 1st Qu.:31.4
## Median : 9.32 Median : 9.938 Median : 44.4 Median :35.2
## Mean :10.08 Mean : 9.458 Mean : 47.6 Mean :37.7
## 3rd Qu.:13.93 3rd Qu.:12.728 3rd Qu.: 53.5 3rd Qu.:41.1
## Max. :24.05 Max. :18.843 Max. :101.7 Max. :82.7
S21: EB Beta Top-n/10 All
## === Skenario 22: EB TopN RSE-NB ===
df_s22 <- run_on_segments(
seg_list = split(df_base, df_base$grup_jenks),
aux_fn = seleksi_topn, model_fn = run_eb_beta, y_col = "y_logit"
)## [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X36_mean, X29_mean, X31_mean, X26
## [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8
## Estimasi y_eb_pct RSE_direct RSE_eb
## Min. : 0.08 Min. : 0.106 Min. : 22.9 Min. :20.2
## 1st Qu.: 6.18 1st Qu.: 6.688 1st Qu.: 38.3 1st Qu.:28.7
## Median : 9.32 Median : 9.829 Median : 44.4 Median :31.5
## Mean :10.08 Mean : 9.488 Mean : 47.6 Mean :36.3
## 3rd Qu.:13.93 3rd Qu.:12.589 3rd Qu.: 53.5 3rd Qu.:41.2
## Max. :24.05 Max. :18.584 Max. :101.7 Max. :87.1
plot_rse(df_s22,"RSE_direct","RSE_eb","Direct","EB-TN RSE-NB","S22: EB Beta-Binomial Top-n/10 — RSE NB")S22: EB Beta Top-n/10 RSE Natural Break
## === Skenario 23: EB TopN RSE-ES ===
df_s23 <- run_on_segments(
seg_list = split(df_base, df_base$grup_equal),
aux_fn = seleksi_topn, model_fn = run_eb_beta, y_col = "y_logit"
)## [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah, X10_jumlah, X8
## [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45, X8, X1, X40_jumlah
## Estimasi y_eb_pct RSE_direct RSE_eb
## Min. : 0.08 Min. : 0.159 Min. : 22.9 Min. :19.9
## 1st Qu.: 6.18 1st Qu.: 6.607 1st Qu.: 38.3 1st Qu.:27.5
## Median : 9.32 Median : 8.383 Median : 44.4 Median :32.7
## Mean :10.08 Mean : 9.541 Mean : 47.6 Mean :35.3
## 3rd Qu.:13.93 3rd Qu.:13.269 3rd Qu.: 53.5 3rd Qu.:40.5
## Max. :24.05 Max. :19.361 Max. :101.7 Max. :76.9
plot_rse(df_s23,"RSE_direct","RSE_eb","Direct","EB-TN RSE-ES","S23: EB Beta-Binomial Top-n/10 — RSE ES")S23: EB Beta Top-n/10 RSE Equal Size
## === Skenario 24: EB TopN Cluster ===
df_s24 <- run_on_segments(
seg_list = split(df_base, df_base$cluster_tn_logit),
aux_fn = seleksi_topn, model_fn = run_eb_beta, y_col = "y_logit"
)## [ Cluster_1 ] y_logit | n = 67 | Vars: X45, X25_jumlah, X11_mean, X26_mean, X43_mean, X10_jumlah
## [ Cluster_2 ] y_logit | n = 14 | Vars: X30
## Estimasi y_eb_pct RSE_direct RSE_eb
## Min. : 0.08 Min. : 0.0893 Min. : 22.9 Min. :20.8
## 1st Qu.: 6.18 1st Qu.: 5.9527 1st Qu.: 38.3 1st Qu.:30.4
## Median : 9.32 Median : 9.6409 Median : 44.4 Median :33.6
## Mean :10.08 Mean : 9.4755 Mean : 47.6 Mean :36.4
## 3rd Qu.:13.93 3rd Qu.:12.0506 3rd Qu.: 53.5 3rd Qu.:38.4
## Max. :24.05 Max. :18.3517 Max. :101.7 Max. :95.1
plot_rse(df_s24,"RSE_direct","RSE_eb","Direct","EB-TN Cluster","S24: EB Beta-Binomial Top-n/10 — Cluster")S24: EB Beta Top-n/10 Cluster-Aux
Prasyarat JAGS: Skenario 25–32 memerlukan JAGS
terinstal di OS dan paket saeHB. Download: https://sourceforge.net/projects/mcmc-jags/files/JAGS/4.x/
lalu restart R. Jika JAGS tidak tersedia, otomatis fallback ke direct
estimator. Setiap skenario HB menyertakan plot
posterior dengan credible interval 95%.
## === Skenario 25: HB Backward All ===
df_s25 <- run_on_segments(
seg_list = list("All" = df_base),
aux_fn = seleksi_backward, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)## [ All ] y_logit | n = 81 | Vars: X8, X45, X44
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -2.3058 0.0150 -2.3363 -2.3162 -2.3073 -2.2950 -2.27
## X8 0.3289 0.0232 0.2803 0.3148 0.3282 0.3448 0.37
## X45 -0.2679 0.0224 -0.3073 -0.2826 -0.2689 -0.2526 -0.22
## X44 0.1447 0.0216 0.1031 0.1297 0.1448 0.1570 0.19
## Estimasi y_hb_pct RSE RSE_hb
## Min. : 0.08 Min. : 2.14 Min. : 22.9 Min. :18.7
## 1st Qu.: 6.18 1st Qu.: 7.67 1st Qu.: 38.3 1st Qu.:22.0
## Median : 9.32 Median :10.10 Median : 44.4 Median :24.4
## Mean :10.08 Mean :10.13 Mean : 47.6 Mean :25.1
## 3rd Qu.:13.93 3rd Qu.:12.70 3rd Qu.: 53.5 3rd Qu.:26.4
## Max. :24.05 Max. :18.58 Max. :101.7 Max. :36.0
S25: HB Beta Backward RSE
S25: HB Beta Posterior — All Domain
## === Skenario 26: HB Backward RSE-NB ===
df_s26 <- run_on_segments(
seg_list = split(df_base, df_base$grup_jenks),
aux_fn = seleksi_backward, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)## [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X29_mean
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -2.1075 0.0136 -2.1339 -2.1165 -2.1068 -2.0985 -2.08
## X8 0.1669 0.0158 0.1349 0.1570 0.1668 0.1778 0.20
## X29_mean -0.1367 0.0135 -0.1636 -0.1454 -0.1367 -0.1270 -0.11
## [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -3.2210 0.0480 -3.3032 -3.2565 -3.2222 -3.1878 -3.12
## X8 0.5560 0.0441 0.4721 0.5255 0.5571 0.5840 0.65
## Estimasi y_hb_pct RSE RSE_hb
## Min. : 0.08 Min. : 1.13 Min. : 22.9 Min. : 4.84
## 1st Qu.: 6.18 1st Qu.: 6.13 1st Qu.: 38.3 1st Qu.: 6.66
## Median : 9.32 Median : 9.08 Median : 44.4 Median : 8.38
## Mean :10.08 Mean :10.09 Mean : 47.6 Mean :14.24
## 3rd Qu.:13.93 3rd Qu.:13.76 3rd Qu.: 53.5 3rd Qu.:12.23
## Max. :24.05 Max. :23.66 Max. :101.7 Max. :45.98
S26: HB Beta Backward RSE Natural Break
S26: HB Beta Posterior — RSE Natural Break
## === Skenario 27: HB Backward RSE-ES ===
df_s27 <- run_on_segments(
seg_list = split(df_base, df_base$grup_equal),
aux_fn = seleksi_backward, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)## [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -1.9493 0.0151 -1.9770 -1.9602 -1.9491 -1.9393 -1.92
## X19 -0.2239 0.0164 -0.2576 -0.2358 -0.2228 -0.2136 -0.19
## X29_jumlah -0.2052 0.0159 -0.2338 -0.2180 -0.2065 -0.1936 -0.17
## [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -2.7097 0.0217 -2.7477 -2.7258 -2.7103 -2.6926 -2.67
## X45 -0.4698 0.0237 -0.5167 -0.4853 -0.4692 -0.4553 -0.42
## Estimasi y_hb_pct RSE RSE_hb
## Min. : 0.08 Min. : 1.83 Min. : 22.9 Min. : 9.04
## 1st Qu.: 6.18 1st Qu.: 6.78 1st Qu.: 38.3 1st Qu.:12.42
## Median : 9.32 Median : 8.60 Median : 44.4 Median :18.55
## Mean :10.08 Mean :10.14 Mean : 47.6 Mean :17.74
## 3rd Qu.:13.93 3rd Qu.:13.46 3rd Qu.: 53.5 3rd Qu.:21.18
## Max. :24.05 Max. :23.01 Max. :101.7 Max. :29.50
S27: HB Beta Backward RSE Equal Size
S27: HB Beta Posterior — RSE Equal Size
## === Skenario 28: HB Backward Cluster ===
df_s28 <- run_on_segments(
seg_list = split(df_base, df_base$cluster_bk_logit),
aux_fn = seleksi_backward, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)## [ Cluster_1 ] y_logit | n = 10 | Vars: X30
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -3.4966 0.0734 -3.6398 -3.5469 -3.4928 -3.4427 -3.38
## X30 -0.6634 0.0785 -0.8073 -0.7180 -0.6647 -0.6077 -0.52
## [ Cluster_2 ] y_logit | n = 71 | Vars: X45_mean, X25_jumlah, X33_mean
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -2.1843 0.0168 -2.2140 -2.1959 -2.1848 -2.1734 -2.15
## X45_mean -0.1498 0.0187 -0.1859 -0.1624 -0.1504 -0.1352 -0.11
## X25_jumlah -0.1359 0.0174 -0.1668 -0.1479 -0.1361 -0.1237 -0.10
## X33_mean -0.1479 0.0188 -0.1841 -0.1599 -0.1487 -0.1350 -0.11
## Estimasi y_hb_pct RSE RSE_hb
## Min. : 0.08 Min. : 0.586 Min. : 22.9 Min. :16.1
## 1st Qu.: 6.18 1st Qu.: 7.259 1st Qu.: 38.3 1st Qu.:20.2
## Median : 9.32 Median : 9.620 Median : 44.4 Median :22.7
## Mean :10.08 Mean :10.076 Mean : 47.6 Mean :24.4
## 3rd Qu.:13.93 3rd Qu.:12.465 3rd Qu.: 53.5 3rd Qu.:25.7
## Max. :24.05 Max. :20.724 Max. :101.7 Max. :51.7
S28: HB Beta Backward Cluster-Aux
S28: HB Beta Posterior — Cluster Aux (A1-Logit)
## === Skenario 29: HB TopN All ===
df_s29 <- run_on_segments(
seg_list = list("All" = df_base),
aux_fn = seleksi_topn, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)## [ All ] y_logit | n = 81 | Vars: X8, X45, X44, X39, X36_mean, X30_mean, X28, X1
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -2.3051 0.0157 -2.3378 -2.3154 -2.3036 -2.2944 -2.28
## X8 0.2939 0.0278 0.2439 0.2730 0.2927 0.3142 0.35
## X45 -0.2487 0.0227 -0.2920 -0.2640 -0.2490 -0.2312 -0.21
## X44 0.3194 0.0255 0.2718 0.3009 0.3213 0.3361 0.37
## X39 -0.0337 0.0209 -0.0732 -0.0475 -0.0339 -0.0195 0.01
## X36_mean -0.1978 0.0288 -0.2518 -0.2167 -0.1961 -0.1771 -0.14
## X30_mean 0.0612 0.0239 0.0154 0.0458 0.0622 0.0773 0.10
## X28 0.1152 0.0194 0.0777 0.1026 0.1151 0.1280 0.15
## X1 0.0240 0.0186 -0.0111 0.0121 0.0233 0.0364 0.06
## Estimasi y_hb_pct RSE RSE_hb
## Min. : 0.08 Min. : 1.73 Min. : 22.9 Min. :18.1
## 1st Qu.: 6.18 1st Qu.: 7.64 1st Qu.: 38.3 1st Qu.:21.7
## Median : 9.32 Median : 9.77 Median : 44.4 Median :23.6
## Mean :10.08 Mean :10.12 Mean : 47.6 Mean :24.2
## 3rd Qu.:13.93 3rd Qu.:12.70 3rd Qu.: 53.5 3rd Qu.:26.1
## Max. :24.05 Max. :19.53 Max. :101.7 Max. :38.6
S29: HB Beta Top-n/10 All
S29: HB Beta Posterior — All Domain
## === Skenario 30: HB TopN RSE-NB ===
df_s30 <- run_on_segments(
seg_list = split(df_base, df_base$grup_jenks),
aux_fn = seleksi_topn, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)## [ G1_RSE_Rendah ] y_logit | n = 62 | Vars: X8, X36_mean, X29_mean, X31_mean, X26
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -2.10307 0.01413 -2.13161 -2.11221 -2.10234 -2.09364 -2.08
## X8 0.16708 0.01846 0.13005 0.15555 0.16729 0.17986 0.20
## X36_mean -0.00662 0.01820 -0.04286 -0.02015 -0.00676 0.00590 0.03
## X29_mean -0.11807 0.01812 -0.15289 -0.12884 -0.11754 -0.10645 -0.09
## X31_mean -0.03707 0.01988 -0.07754 -0.04935 -0.03882 -0.02265 0.00
## X26 0.02059 0.01810 -0.01693 0.00942 0.02151 0.03125 0.06
## [ G2_RSE_Tinggi ] y_logit | n = 19 | Vars: X8
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -3.2210 0.0480 -3.3032 -3.2565 -3.2222 -3.1878 -3.12
## X8 0.5560 0.0441 0.4721 0.5255 0.5571 0.5840 0.65
## Estimasi y_hb_pct RSE RSE_hb
## Min. : 0.08 Min. : 1.13 Min. : 22.9 Min. : 5.88
## 1st Qu.: 6.18 1st Qu.: 6.13 1st Qu.: 38.3 1st Qu.: 7.60
## Median : 9.32 Median : 9.08 Median : 44.4 Median : 9.79
## Mean :10.08 Mean :10.09 Mean : 47.6 Mean :15.03
## 3rd Qu.:13.93 3rd Qu.:13.74 3rd Qu.: 53.5 3rd Qu.:14.67
## Max. :24.05 Max. :23.51 Max. :101.7 Max. :45.98
S30: HB Beta Top-n/10 RSE Natural Break
S30: HB Beta Posterior — RSE Natural Break
## === Skenario 31: HB TopN RSE-ES ===
df_s31 <- run_on_segments(
seg_list = split(df_base, df_base$grup_equal),
aux_fn = seleksi_topn, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)## [ G1_RSE_Bawah ] y_logit | n = 41 | Vars: X19, X29_jumlah, X10_jumlah, X8
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -1.9517 0.0170 -1.9838 -1.9630 -1.9516 -1.9408 -1.92
## X19 -0.1901 0.0163 -0.2205 -0.2024 -0.1890 -0.1782 -0.16
## X29_jumlah -0.1243 0.0172 -0.1543 -0.1349 -0.1245 -0.1130 -0.09
## X10_jumlah -0.0765 0.0152 -0.1046 -0.0864 -0.0766 -0.0661 -0.05
## X8 0.0939 0.0157 0.0632 0.0837 0.0943 0.1033 0.13
## [ G2_RSE_Atas ] y_logit | n = 40 | Vars: X45, X8, X1, X40_jumlah
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -2.71930 0.01925 -2.75942 -2.73069 -2.71907 -2.70639 -2.68
## X45 -0.42047 0.02988 -0.47661 -0.44089 -0.41907 -0.40245 -0.36
## X8 -0.08794 0.02821 -0.14524 -0.10612 -0.08710 -0.06989 -0.03
## X1 -0.13468 0.02245 -0.17781 -0.15069 -0.13461 -0.11993 -0.09
## X40_jumlah 0.03389 0.02008 -0.00288 0.02102 0.03419 0.04592 0.07
## Estimasi y_hb_pct RSE RSE_hb
## Min. : 0.08 Min. : 2.06 Min. : 22.9 Min. : 6.84
## 1st Qu.: 6.18 1st Qu.: 6.43 1st Qu.: 38.3 1st Qu.: 9.47
## Median : 9.32 Median : 8.39 Median : 44.4 Median :17.74
## Mean :10.08 Mean :10.14 Mean : 47.6 Mean :16.48
## 3rd Qu.:13.93 3rd Qu.:13.51 3rd Qu.: 53.5 3rd Qu.:22.48
## Max. :24.05 Max. :23.65 Max. :101.7 Max. :29.64
S31: HB Beta Top-n/10 RSE Equal Size
S31: HB Beta Posterior — RSE Equal Size
## === Skenario 32: HB TopN Cluster ===
df_s32 <- run_on_segments(
seg_list = split(df_base, df_base$cluster_tn_logit),
aux_fn = seleksi_topn, model_fn = run_hbbeta, y_col = "y_logit", min_n = 5
)## [ Cluster_1 ] y_logit | n = 67 | Vars: X45, X25_jumlah, X11_mean, X26_mean, X43_mean, X10_jumlah
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -2.158891 0.016203 -2.189879 -2.169829 -2.158367 -2.149129 -2.12
## X45 -0.099301 0.017205 -0.131962 -0.110902 -0.099404 -0.088501 -0.07
## X25_jumlah -0.101848 0.020448 -0.139426 -0.116646 -0.101861 -0.088094 -0.06
## X11_mean 0.030707 0.014922 0.000532 0.021010 0.030359 0.038723 0.06
## X26_mean -0.087337 0.016424 -0.123431 -0.098104 -0.086175 -0.075856 -0.06
## X43_mean -0.061523 0.016941 -0.094442 -0.072310 -0.062899 -0.050619 -0.03
## X10_jumlah -0.100763 0.020370 -0.140027 -0.115026 -0.100517 -0.087260 -0.06
## [ Cluster_2 ] y_logit | n = 14 | Vars: X30
## Mean SD 2.5% 25% 50% 75% 97.5%
## intercept -3.1841 0.0697 -3.3371 -3.2301 -3.1788 -3.1343 -3.07
## X30 -0.4277 0.0703 -0.5566 -0.4743 -0.4268 -0.3808 -0.29
## Estimasi y_hb_pct RSE RSE_hb
## Min. : 0.08 Min. : 1.03 Min. : 22.9 Min. :15.3
## 1st Qu.: 6.18 1st Qu.: 6.51 1st Qu.: 38.3 1st Qu.:19.2
## Median : 9.32 Median : 9.54 Median : 44.4 Median :21.3
## Mean :10.08 Mean :10.10 Mean : 47.6 Mean :24.3
## 3rd Qu.:13.93 3rd Qu.:13.24 3rd Qu.: 53.5 3rd Qu.:25.2
## Max. :24.05 Max. :20.74 Max. :101.7 Max. :53.1
S32: HB Beta Top-n/10 Cluster-Aux
S32: HB Beta Posterior — Cluster Aux (A2-Logit)
# Standarisasi: tiap df hasil ditambah y_model & RSE_model
standardize_result <- function(df, y_col, rse_col, skenario_id) {
if (is.null(df)) return(NULL)
df %>%
mutate(
y_model = .data[[y_col]],
RSE_model = .data[[rse_col]],
Skenario = skenario_id
) %>%
dplyr::select(Kako, Estimasi, RSE, y_model, RSE_model, Skenario)
}
# Baseline direct
df_direct_eval <- df_base %>%
mutate(RSE_direct_eval = RSE_direct_col) %>%
dplyr::select(Kako, Estimasi, RSE, RSE_direct_eval)
# ── Tabel distribusi RSE ─────────────────────────────────────────────────
make_tabel_row <- function(df, rse_col, label) {
if (is.null(df) || !rse_col %in% names(df)) {
return(data.frame(Skenario = label, `RSE>=25` = NA, `RSE<25` = NA,
`%<25` = NA, `%<15` = NA, `CV Mean` = NA,
check.names = FALSE))
}
tabel_rse(df, rse_col, label)
}
eval_rows <- list(
make_tabel_row(df_base, "RSE_direct_col", "0. Direct"),
# EBLUP
make_tabel_row(df_s1, "RSE_eblup", "S01 EBLUP-BK All"),
make_tabel_row(df_s2, "RSE_eblup", "S02 EBLUP-BK RSE-NB"),
make_tabel_row(df_s3, "RSE_eblup", "S03 EBLUP-BK RSE-ES"),
make_tabel_row(df_s4, "RSE_eblup", "S04 EBLUP-BK Cluster"),
make_tabel_row(df_s5, "RSE_eblup", "S05 EBLUP-TN All"),
make_tabel_row(df_s6, "RSE_eblup", "S06 EBLUP-TN RSE-NB"),
make_tabel_row(df_s7, "RSE_eblup", "S07 EBLUP-TN RSE-ES"),
make_tabel_row(df_s8, "RSE_eblup", "S08 EBLUP-TN Cluster"),
# GLMM
make_tabel_row(df_s9, "RSE_glmm", "S09 GLMM-BK All"),
make_tabel_row(df_s10, "RSE_glmm", "S10 GLMM-BK RSE-NB"),
make_tabel_row(df_s11, "RSE_glmm", "S11 GLMM-BK RSE-ES"),
make_tabel_row(df_s12, "RSE_glmm", "S12 GLMM-BK Cluster"),
make_tabel_row(df_s13, "RSE_glmm", "S13 GLMM-TN All"),
make_tabel_row(df_s14, "RSE_glmm", "S14 GLMM-TN RSE-NB"),
make_tabel_row(df_s15, "RSE_glmm", "S15 GLMM-TN RSE-ES"),
make_tabel_row(df_s16, "RSE_glmm", "S16 GLMM-TN Cluster"),
# EB Beta
make_tabel_row(df_s17, "RSE_eb", "S17 EB-BK All"),
make_tabel_row(df_s18, "RSE_eb", "S18 EB-BK RSE-NB"),
make_tabel_row(df_s19, "RSE_eb", "S19 EB-BK RSE-ES"),
make_tabel_row(df_s20, "RSE_eb", "S20 EB-BK Cluster"),
make_tabel_row(df_s21, "RSE_eb", "S21 EB-TN All"),
make_tabel_row(df_s22, "RSE_eb", "S22 EB-TN RSE-NB"),
make_tabel_row(df_s23, "RSE_eb", "S23 EB-TN RSE-ES"),
make_tabel_row(df_s24, "RSE_eb", "S24 EB-TN Cluster"),
# HB Beta
make_tabel_row(df_s25, "RSE_hb", "S25 HB-BK All"),
make_tabel_row(df_s26, "RSE_hb", "S26 HB-BK RSE-NB"),
make_tabel_row(df_s27, "RSE_hb", "S27 HB-BK RSE-ES"),
make_tabel_row(df_s28, "RSE_hb", "S28 HB-BK Cluster"),
make_tabel_row(df_s29, "RSE_hb", "S29 HB-TN All"),
make_tabel_row(df_s30, "RSE_hb", "S30 HB-TN RSE-NB"),
make_tabel_row(df_s31, "RSE_hb", "S31 HB-TN RSE-ES"),
make_tabel_row(df_s32, "RSE_hb", "S32 HB-TN Cluster")
)
tabel_eval <- bind_rows(eval_rows)kable(tabel_eval,
caption = "Distribusi RSE — 32 Skenario + Direct",
col.names = c("Skenario", "RSE≥25", "RSE<25", "% <25%", "% <15%", "CV Mean")) %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, font_size = 12) %>%
row_spec(1, background = "#fff3cd") %>%
row_spec(
which(tabel_eval$`%<25` == max(tabel_eval$`%<25`, na.rm = TRUE))[-1],
bold = TRUE, background = "#d4edda"
) %>%
pack_rows("EBLUP", 2, 9) %>%
pack_rows("GLMM", 10, 17) %>%
pack_rows("EB Beta-Binomial", 18, 25) %>%
pack_rows("HB Beta-Binomial", 26, 33)| Skenario | RSE≥25 | RSE<25 | % <25% | % <15% | CV Mean |
|---|---|---|---|---|---|
|
80 | 1 | 1.2 | 0.0 | 47.56 |
| EBLUP | |||||
| S01 EBLUP-BK All | 66 | 15 | 18.5 | 0.0 | 32.57 |
| S02 EBLUP-BK RSE-NB | 26 | 55 | 67.9 | 35.8 | 27.37 |
| S03 EBLUP-BK RSE-ES | 48 | 33 | 40.7 | 0.0 | 31.20 |
| S04 EBLUP-BK Cluster | 66 | 15 | 18.5 | 0.0 | 33.65 |
| S05 EBLUP-TN All | 72 | 9 | 11.1 | 0.0 | 33.79 |
| S06 EBLUP-TN RSE-NB | 27 | 54 | 66.7 | 27.2 | 28.54 |
| S07 EBLUP-TN RSE-ES | 53 | 28 | 34.6 | 0.0 | 33.04 |
| S08 EBLUP-TN Cluster | 75 | 6 | 7.4 | 0.0 | 35.84 |
| GLMM | |||||
| S09 GLMM-BK All | 1 | 80 | 98.8 | 98.8 | 2.23 |
| S10 GLMM-BK RSE-NB | 2 | 79 | 97.5 | 93.8 | 3.49 |
| S11 GLMM-BK RSE-ES | 1 | 80 | 98.8 | 97.5 | 2.93 |
| S12 GLMM-BK Cluster | 3 | 78 | 96.3 | 93.8 | 5.83 |
| S13 GLMM-TN All | 1 | 80 | 98.8 | 97.5 | 5.50 |
| S14 GLMM-TN RSE-NB | 2 | 79 | 97.5 | 92.6 | 4.08 |
| S15 GLMM-TN RSE-ES | 1 | 80 | 98.8 | 98.8 | 4.21 |
| S16 GLMM-TN Cluster | 4 | 77 | 95.1 | 90.1 | 7.99 |
| EB Beta-Binomial | |||||
| S17 EB-BK All | 79 | 2 | 2.5 | 0.0 | 37.68 |
| S18 EB-BK RSE-NB | 75 | 6 | 7.4 | 0.0 | 36.29 |
| S19 EB-BK RSE-ES | 71 | 10 | 12.3 | 0.0 | 35.39 |
| S20 EB-BK Cluster | 79 | 2 | 2.5 | 0.0 | 36.36 |
| S21 EB-TN All | 79 | 2 | 2.5 | 0.0 | 37.66 |
| S22 EB-TN RSE-NB | 75 | 6 | 7.4 | 0.0 | 36.29 |
| S23 EB-TN RSE-ES | 71 | 10 | 12.3 | 0.0 | 35.33 |
| S24 EB-TN Cluster | 79 | 2 | 2.5 | 0.0 | 36.38 |
| HB Beta-Binomial | |||||
| S25 HB-BK All | 32 | 49 | 60.5 | 0.0 | 25.06 |
| S26 HB-BK RSE-NB | 19 | 62 | 76.5 | 76.5 | 14.24 |
| S27 HB-BK RSE-ES | 9 | 72 | 88.9 | 38.3 | 17.74 |
| S28 HB-BK Cluster | 23 | 58 | 71.6 | 0.0 | 24.45 |
| S29 HB-TN All | 28 | 53 | 65.4 | 0.0 | 24.21 |
| S30 HB-TN RSE-NB | 19 | 62 | 76.5 | 75.3 | 15.03 |
| S31 HB-TN RSE-ES | 9 | 72 | 88.9 | 46.9 | 16.48 |
| S32 HB-TN Cluster | 22 | 59 | 72.8 | 0.0 | 24.28 |
get_est <- function(df, col) if (!is.null(df) && col %in% names(df)) df[[col]] else NULL
metrik_rows <- list(
metrik(get_est(df_s1, "y_eblup"), df_s1$Estimasi, "S01 EBLUP-BK All"),
metrik(get_est(df_s2, "y_eblup"), df_s2$Estimasi, "S02 EBLUP-BK RSE-NB"),
metrik(get_est(df_s3, "y_eblup"), df_s3$Estimasi, "S03 EBLUP-BK RSE-ES"),
metrik(get_est(df_s4, "y_eblup"), df_s4$Estimasi, "S04 EBLUP-BK Cluster"),
metrik(get_est(df_s5, "y_eblup"), df_s5$Estimasi, "S05 EBLUP-TN All"),
metrik(get_est(df_s6, "y_eblup"), df_s6$Estimasi, "S06 EBLUP-TN RSE-NB"),
metrik(get_est(df_s7, "y_eblup"), df_s7$Estimasi, "S07 EBLUP-TN RSE-ES"),
metrik(get_est(df_s8, "y_eblup"), df_s8$Estimasi, "S08 EBLUP-TN Cluster"),
metrik(get_est(df_s9, "y_glmm"), df_s9$Estimasi, "S09 GLMM-BK All"),
metrik(get_est(df_s10, "y_glmm"), df_s10$Estimasi, "S10 GLMM-BK RSE-NB"),
metrik(get_est(df_s11, "y_glmm"), df_s11$Estimasi, "S11 GLMM-BK RSE-ES"),
metrik(get_est(df_s12, "y_glmm"), df_s12$Estimasi, "S12 GLMM-BK Cluster"),
metrik(get_est(df_s13, "y_glmm"), df_s13$Estimasi, "S13 GLMM-TN All"),
metrik(get_est(df_s14, "y_glmm"), df_s14$Estimasi, "S14 GLMM-TN RSE-NB"),
metrik(get_est(df_s15, "y_glmm"), df_s15$Estimasi, "S15 GLMM-TN RSE-ES"),
metrik(get_est(df_s16, "y_glmm"), df_s16$Estimasi, "S16 GLMM-TN Cluster"),
metrik(get_est(df_s17, "y_eb_pct"), df_s17$Estimasi, "S17 EB-BK All"),
metrik(get_est(df_s18, "y_eb_pct"), df_s18$Estimasi, "S18 EB-BK RSE-NB"),
metrik(get_est(df_s19, "y_eb_pct"), df_s19$Estimasi, "S19 EB-BK RSE-ES"),
metrik(get_est(df_s20, "y_eb_pct"), df_s20$Estimasi, "S20 EB-BK Cluster"),
metrik(get_est(df_s21, "y_eb_pct"), df_s21$Estimasi, "S21 EB-TN All"),
metrik(get_est(df_s22, "y_eb_pct"), df_s22$Estimasi, "S22 EB-TN RSE-NB"),
metrik(get_est(df_s23, "y_eb_pct"), df_s23$Estimasi, "S23 EB-TN RSE-ES"),
metrik(get_est(df_s24, "y_eb_pct"), df_s24$Estimasi, "S24 EB-TN Cluster"),
metrik(get_est(df_s25, "y_hb_pct"), df_s25$Estimasi, "S25 HB-BK All"),
metrik(get_est(df_s26, "y_hb_pct"), df_s26$Estimasi, "S26 HB-BK RSE-NB"),
metrik(get_est(df_s27, "y_hb_pct"), df_s27$Estimasi, "S27 HB-BK RSE-ES"),
metrik(get_est(df_s28, "y_hb_pct"), df_s28$Estimasi, "S28 HB-BK Cluster"),
metrik(get_est(df_s29, "y_hb_pct"), df_s29$Estimasi, "S29 HB-TN All"),
metrik(get_est(df_s30, "y_hb_pct"), df_s30$Estimasi, "S30 HB-TN RSE-NB"),
metrik(get_est(df_s31, "y_hb_pct"), df_s31$Estimasi, "S31 HB-TN RSE-ES"),
metrik(get_est(df_s32, "y_hb_pct"), df_s32$Estimasi, "S32 HB-TN Cluster")
)
tabel_metrik <- bind_rows(Filter(Negate(is.null), metrik_rows))
kable(tabel_metrik,
caption = "Relative Bias (%) & RMSE terhadap Direct Estimator") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed","bordered"),
full_width = FALSE, font_size = 12) %>%
row_spec(which(abs(tabel_metrik$`RB (%)`) == min(abs(tabel_metrik$`RB (%)`), na.rm=TRUE)),
bold = TRUE, background = "#d4edda")| Skenario | RB (%) | RMSE |
|---|---|---|
| S01 EBLUP-BK All | -6.336 | 3.8559 |
| S02 EBLUP-BK RSE-NB | -11.100 | 4.1048 |
| S03 EBLUP-BK RSE-ES | -9.116 | 3.2683 |
| S04 EBLUP-BK Cluster | -6.150 | 3.6493 |
| S05 EBLUP-TN All | -5.870 | 3.7863 |
| S06 EBLUP-TN RSE-NB | -10.974 | 4.0329 |
| S07 EBLUP-TN RSE-ES | -8.461 | 3.1051 |
| S08 EBLUP-TN Cluster | -6.284 | 3.4471 |
| S09 GLMM-BK All | 11.118 | 2.4846 |
| S10 GLMM-BK RSE-NB | 4.191 | 3.6762 |
| S11 GLMM-BK RSE-ES | 6.220 | 2.8721 |
| S12 GLMM-BK Cluster | 9.401 | 2.9225 |
| S13 GLMM-TN All | 11.320 | 2.5729 |
| S14 GLMM-TN RSE-NB | 4.400 | 3.7664 |
| S15 GLMM-TN RSE-ES | 7.038 | 3.0583 |
| S16 GLMM-TN Cluster | 10.842 | 3.2166 |
| S17 EB-BK All | 2.070 | 1.9740 |
| S18 EB-BK RSE-NB | 0.581 | 2.0928 |
| S19 EB-BK RSE-ES | 0.754 | 1.7348 |
| S20 EB-BK Cluster | 2.910 | 2.0358 |
| S21 EB-TN All | 2.162 | 1.9689 |
| S22 EB-TN RSE-NB | 0.586 | 2.0865 |
| S23 EB-TN RSE-ES | 0.903 | 1.7279 |
| S24 EB-TN Cluster | 3.135 | 2.1364 |
| S25 HB-BK All | 64.926 | 2.3195 |
| S26 HB-BK RSE-NB | 23.497 | 0.6285 |
| S27 HB-BK RSE-ES | 71.816 | 1.4940 |
| S28 HB-BK Cluster | 24.821 | 1.7359 |
| S29 HB-TN All | 81.880 | 2.3613 |
| S30 HB-TN RSE-NB | 23.608 | 0.6426 |
| S31 HB-TN RSE-ES | 72.073 | 1.3614 |
| S32 HB-TN Cluster | 32.512 | 1.5165 |
direct_pct <- pct_ok(df_base$RSE_direct_col)
plot_df <- tabel_eval %>%
filter(Skenario != "0. Direct") %>%
mutate(
pct_25 = `% < 25%`, # << nama persis dari debug
Model = case_when(
grepl("EBLUP", Skenario) ~ "EBLUP",
grepl("GLMM", Skenario) ~ "GLMM",
grepl("EB-BK|EB-TN", Skenario) ~ "EB Beta",
grepl("HB-BK|HB-TN", Skenario) ~ "HB Beta",
TRUE ~ "Lain"
),
AuxMetode = ifelse(grepl("-BK", Skenario), "Backward", "Top-n/10"),
Skenario = factor(Skenario, levels = rev(unique(Skenario)))
)
ggplot(plot_df, aes(x = Skenario, y = pct_25, fill = Model, alpha = AuxMetode)) +
geom_col(width = 0.72) +
geom_text(aes(label = paste0(pct_25, "%")),
hjust = -0.12, size = 3, fontface = "bold") +
geom_vline(xintercept = direct_pct,
linetype = "dashed", color = "#c0392b", linewidth = 0.9) +
scale_fill_manual(values = c(
"EBLUP" = "#2980b9", "GLMM" = "#8e44ad",
"EB Beta" = "#e67e22", "HB Beta" = "#27ae60"
)) +
scale_alpha_manual(values = c("Backward" = 1, "Top-n/10" = 0.65)) +
scale_y_continuous(limits = c(0, 115), expand = c(0, 0)) +
coord_flip() +
labs(title = "% Domain RSE < 25% — 32 Skenario SAE",
subtitle = "Garis merah = baseline Direct | Gelap = Backward, Terang = Top-n/10",
x = NULL, y = "% Domain RSE < 25%", fill = "Model", alpha = "Seleksi Aux") +
theme_minimal(base_size = 11) +
theme(legend.position = "top", plot.title = element_text(face = "bold", size = 13),
panel.grid.major.y = element_blank())make_scatter_df <- function(df, y_col, rse_col, label) {
if (is.null(df) || !all(c(y_col, rse_col, "RSE_direct") %in% names(df))) return(NULL)
df %>%
mutate(RSE_model = .data[[rse_col]], Skenario = label) %>%
dplyr::select(Kako, RSE_direct, RSE_model, Skenario) # << RSE_direct bukan RSE_direct_col
}
scatter_list <- list(
make_scatter_df(df_s1, "y_eblup", "RSE_eblup", "S01 EBLUP-BK All"),
make_scatter_df(df_s2, "y_eblup", "RSE_eblup", "S02 EBLUP-BK RSE-NB"),
make_scatter_df(df_s3, "y_eblup", "RSE_eblup", "S03 EBLUP-BK RSE-ES"),
make_scatter_df(df_s4, "y_eblup", "RSE_eblup", "S04 EBLUP-BK Clust"),
make_scatter_df(df_s5, "y_eblup", "RSE_eblup", "S05 EBLUP-TN All"),
make_scatter_df(df_s6, "y_eblup", "RSE_eblup", "S06 EBLUP-TN RSE-NB"),
make_scatter_df(df_s7, "y_eblup", "RSE_eblup", "S07 EBLUP-TN RSE-ES"),
make_scatter_df(df_s8, "y_eblup", "RSE_eblup", "S08 EBLUP-TN Clust"),
make_scatter_df(df_s9, "y_glmm", "RSE_glmm", "S09 GLMM-BK All"),
make_scatter_df(df_s10, "y_glmm", "RSE_glmm", "S10 GLMM-BK RSE-NB"),
make_scatter_df(df_s11, "y_glmm", "RSE_glmm", "S11 GLMM-BK RSE-ES"),
make_scatter_df(df_s12, "y_glmm", "RSE_glmm", "S12 GLMM-BK Clust"),
make_scatter_df(df_s13, "y_glmm", "RSE_glmm", "S13 GLMM-TN All"),
make_scatter_df(df_s14, "y_glmm", "RSE_glmm", "S14 GLMM-TN RSE-NB"),
make_scatter_df(df_s15, "y_glmm", "RSE_glmm", "S15 GLMM-TN RSE-ES"),
make_scatter_df(df_s16, "y_glmm", "RSE_glmm", "S16 GLMM-TN Clust"),
make_scatter_df(df_s17, "y_eb_pct", "RSE_eb", "S17 EB-BK All"),
make_scatter_df(df_s18, "y_eb_pct", "RSE_eb", "S18 EB-BK RSE-NB"),
make_scatter_df(df_s19, "y_eb_pct", "RSE_eb", "S19 EB-BK RSE-ES"),
make_scatter_df(df_s20, "y_eb_pct", "RSE_eb", "S20 EB-BK Clust"),
make_scatter_df(df_s21, "y_eb_pct", "RSE_eb", "S21 EB-TN All"),
make_scatter_df(df_s22, "y_eb_pct", "RSE_eb", "S22 EB-TN RSE-NB"),
make_scatter_df(df_s23, "y_eb_pct", "RSE_eb", "S23 EB-TN RSE-ES"),
make_scatter_df(df_s24, "y_eb_pct", "RSE_eb", "S24 EB-TN Clust"),
make_scatter_df(df_s25, "y_hb_pct", "RSE_hb", "S25 HB-BK All"),
make_scatter_df(df_s26, "y_hb_pct", "RSE_hb", "S26 HB-BK RSE-NB"),
make_scatter_df(df_s27, "y_hb_pct", "RSE_hb", "S27 HB-BK RSE-ES"),
make_scatter_df(df_s28, "y_hb_pct", "RSE_hb", "S28 HB-BK Clust"),
make_scatter_df(df_s29, "y_hb_pct", "RSE_hb", "S29 HB-TN All"),
make_scatter_df(df_s30, "y_hb_pct", "RSE_hb", "S30 HB-TN RSE-NB"),
make_scatter_df(df_s31, "y_hb_pct", "RSE_hb", "S31 HB-TN RSE-ES"),
make_scatter_df(df_s32, "y_hb_pct", "RSE_hb", "S32 HB-TN Clust")
)
df_scatter <- bind_rows(Filter(Negate(is.null), scatter_list)) %>%
filter(!is.na(RSE_direct) & !is.na(RSE_model))
lim_max <- quantile(c(df_scatter$RSE_direct, df_scatter$RSE_model), 0.99, na.rm = TRUE)
ggplot(df_scatter, aes(x = RSE_direct, y = RSE_model)) +
geom_point(alpha = 0.5, size = 1.3, color = "#2980b9") +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "#c0392b") +
geom_hline(yintercept = 25, linetype = "dotted", color = "#e67e22") +
geom_vline(xintercept = 25, linetype = "dotted", color = "#e67e22") +
facet_wrap(~ Skenario, ncol = 4, scales = "free") +
coord_cartesian(xlim = c(0, lim_max), ylim = c(0, lim_max)) +
labs(title = "RSE Direct vs RSE Model — 32 Skenario SAE",
subtitle = "Di bawah diagonal merah = lebih presisi dari direct",
x = "RSE Direct (%)", y = "RSE Model (%)") +
theme_minimal(base_size = 9) +
theme(strip.text = element_text(face = "bold", size = 7),
plot.title = element_text(face = "bold"))heat_df <- tabel_eval %>%
filter(Skenario != "0. Direct") %>%
mutate(
pct_25 = `% < 25%`, # << nama persis
Model = case_when(
grepl("EBLUP", Skenario) ~ "EBLUP",
grepl("GLMM", Skenario) ~ "GLMM",
grepl("EB-BK|EB-TN", Skenario) ~ "EB-Beta",
TRUE ~ "HB-Beta"
),
AuxMetode = ifelse(grepl("-BK", Skenario), "Backward", "Top-n/10"),
Partisi = case_when(
grepl("All", Skenario) ~ "All",
grepl("RSE-NB", Skenario) ~ "RSE-NB",
grepl("RSE-ES", Skenario) ~ "RSE-ES",
TRUE ~ "Cluster"
),
Partisi = factor(Partisi, levels = c("All","RSE-NB","RSE-ES","Cluster")),
Model = factor(Model, levels = c("EBLUP","GLMM","EB-Beta","HB-Beta")),
AuxMetode = factor(AuxMetode, levels = c("Backward","Top-n/10"))
)
ggplot(heat_df, aes(x = Partisi, y = Model, fill = pct_25)) +
geom_tile(color = "white", linewidth = 0.8) +
geom_text(aes(label = paste0(pct_25, "%")), size = 4, fontface = "bold") +
facet_wrap(~ AuxMetode, ncol = 2) +
scale_fill_gradient2(low = "#e74c3c", mid = "#f39c12", high = "#27ae60",
midpoint = 60, name = "% Domain\nRSE < 25%") +
labs(title = "Heatmap Performa — % Domain RSE < 25%",
subtitle = "Kiri = Backward | Kanan = Top-n/10",
x = "Partisi", y = "Model") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold"),
strip.text = element_text(face = "bold", size = 12),
axis.text = element_text(size = 11))best_rse <- tabel_eval %>%
filter(Skenario != "0. Direct") %>%
mutate(pct_25 = `% < 25%`) %>% # << tarik dulu
slice_max(pct_25, n = 3, with_ties = FALSE)
best_rb <- tabel_metrik %>%
slice_min(abs(`RB (%)`), n = 3, with_ties = FALSE)
cat("=== Top 3 Model (% domain RSE < 25%) ===\n")## === Top 3 Model (% domain RSE < 25%) ===
## Skenario pct_25 % < 15% CV Mean
## 1 S09 GLMM-BK All 98.8 98.8 2.23
## 2 S11 GLMM-BK RSE-ES 98.8 97.5 2.93
## 3 S13 GLMM-TN All 98.8 97.5 5.50
##
## === Top 3 Model (Relative Bias terkecil) ===
## Skenario RB (%) RMSE
## 1 S18 EB-BK RSE-NB 0.581 2.093
## 2 S22 EB-TN RSE-NB 0.586 2.087
## 3 S19 EB-BK RSE-ES 0.754 1.735
Panduan interpretasi 32 skenario:
Seleksi Aux (Backward vs Top-n/10): Backward
lebih selektif (stepAIC membuang variabel non-signifikan); Top-n/10
mempertahankan variabel dengan korelasi tertinggi. Keduanya menggunakan
floor(n_partisi/10) variabel awal dari pre-filter
korelasi.
EBLUP (Normal): Baseline kuat untuk domain tidak terlalu ekstrim proporsinya. Rentan estimasi negatif jika proporsi mendekati 0.
GLMM (Logit, glmmTMB): Menangani bounded response [0,1] lebih proper. Random intercept per domain memberi shrinkage empiris. MSE dari Jackknife leave-one-domain-out.
EB Beta-Binomial: Analytical — cepat dan tidak perlu MCMC. Prior mean domain-spesifik dari regresi logit. Posterior variance analytik dari distribusi Beta konjugat.
HB Beta (saeHB/JAGS): Paling proper secara Bayesian. Memanfaatkan full posterior distribution. Plot posterior dengan 95% CI memperlihatkan ketidakpastian per domain.
Partisi RSE-NB vs RSE-ES: Natural Break (Jenks) memisahkan berdasarkan gap alami distribusi RSE; Equal Size memastikan kedua grup seimbang. RSE-NB lebih heterogen antar grup, RSE-ES lebih stabil dalam estimasi.
Cluster-Aux: Variabel clustering berbeda per kombinasi (Aux method, Link function), membuat cluster lebih relevan secara statistik untuk masing-masing model.
Dianalisis dengan R. Seluruh 32 skenario merupakan kombinasi dari 2 metode seleksi aux × 4 model × 4 partisi.