Pendahuluan

Dokumen ini menjelaskan langkah-langkah analisis Small Area Estimation (SAE) menggunakan model Fay-Herriot (FH) untuk dua wilayah: Sarbagita (kode 51) dan Banjarbakula (kode 63). Analisis ini bertujuan menghasilkan estimasi parameter pada level kecamatan dengan memanfaatkan data survei dan data auxiliary.

1. Persiapan dan Import Library

Langkah pertama adalah memuat library yang diperlukan untuk analisis statistik, manipulasi data, visualisasi, dan SAE.

library(emdi)
library(dplyr)
library(ggplot2)
library(car)
library(lmtest)
library(MASS)
library(readxl)
library(writexl)
library(reshape2)
library(moments) # untuk menghitung skewness

2. Load dan Validasi Data

Data yang digunakan terdiri dari: - Auxiliary Data: data/kecamatan_fe.xlsx - Survey Variables: data/kecamatan_surveyvars_svy.xlsx - Direct Estimate: data/direct_estimate.xlsx

aux_data <- read_xlsx("../data/kecamatan_fe.xlsx")
surveyvars_data <- read_xlsx("../data/kecamatan_surveyvars_svy.xlsx")
direct_data <- read_xlsx("../data/direct_estimate.xlsx")

2a. Cek dan Transformasi Skewness Variabel Prediktor

Sebelum pemodelan, distribusi variabel prediktor dicek kemencengannya (skewness). Jika skewness absolut > 1 dan semua nilai >= 0, dilakukan transformasi log1p.

vars_kandidat <- c(
  "jumlah_bts_per_desa", "proporsi_desa_pertanian", "proporsi_desa_non_pertanian",
  "proporsi_lembaga_keuangan", "proporsi_koperasi", "proporsi_sarana_ekonomi",
  "proporsi_desa_fasilitas_kredit", "proporsi_sarana_kesehatan", "proporsi_sarana_keterampilan",
  "proporsi_keluarga_blt", "proporsi_keluarga_padat_karya", "proporsi_keluarga_kumuh",
  "proporsi_desa_air_minum_layak", "proporsi_desa_sanitasi_layak", "proporsi_desa_jalan_darat",
  "proporsi_desa_sinyal_kuat", "proporsi_desa_angkutan_umum", "proporsi_sd", "proporsi_smp",
  "proporsi_sma", "proporsi_pt", "proporsi_industri_mikro_kecil", "proporsi_sktm",
  "proporsi_kk_bekerja", "Dependency_Ratio", "Proporsi_Keluarga_Usaha_Pertanian",
  "Proporsi_KK_Bekerja", "Sex_Ratio", "proporsi_tenaga_medis"
)

# Untuk Sarbagita (51)
aux_data_51 <- aux_data[substr(aux_data$id_kec, 1, 2) == "51", ]
skewness_info_51 <- sapply(vars_kandidat, function(v) {
  x <- aux_data_51[[v]]
  if (is.numeric(x) && !all(is.na(x))) {
    moments::skewness(x, na.rm = TRUE)
  } else {
    NA
  }
})
vars_to_log_51 <- names(skewness_info_51)[which(abs(skewness_info_51) > 1)]
for (v in vars_to_log_51) {
  if (all(aux_data_51[[v]] >= 0, na.rm = TRUE)) {
    aux_data_51[[paste0(v, "_log")]] <- log1p(aux_data_51[[v]])
  }
}
skew_table_51 <- data.frame(
  Variabel = names(skewness_info_51),
  Skewness = as.numeric(skewness_info_51),
  Transformasi_Log1p = ifelse(names(skewness_info_51) %in% vars_to_log_51, "YA", "TIDAK")
)
print(skew_table_51, row.names = FALSE)
##                           Variabel   Skewness Transformasi_Log1p
##                jumlah_bts_per_desa  3.7274496                 YA
##            proporsi_desa_pertanian -0.6136942              TIDAK
##        proporsi_desa_non_pertanian  0.6136942              TIDAK
##          proporsi_lembaga_keuangan  2.4682109                 YA
##                  proporsi_koperasi  1.5647333                 YA
##            proporsi_sarana_ekonomi  2.7310744                 YA
##     proporsi_desa_fasilitas_kredit -3.3053812                 YA
##          proporsi_sarana_kesehatan  2.3556796                 YA
##       proporsi_sarana_keterampilan  2.2942794                 YA
##              proporsi_keluarga_blt  0.5311563              TIDAK
##      proporsi_keluarga_padat_karya  2.9762248                 YA
##            proporsi_keluarga_kumuh  4.1149136                 YA
##      proporsi_desa_air_minum_layak -2.3377239                 YA
##       proporsi_desa_sanitasi_layak  0.2734514              TIDAK
##          proporsi_desa_jalan_darat        NaN              TIDAK
##          proporsi_desa_sinyal_kuat -2.8833182                 YA
##        proporsi_desa_angkutan_umum -1.6031779                 YA
##                        proporsi_sd  1.0407611                 YA
##                       proporsi_smp  1.4769412                 YA
##                       proporsi_sma  1.0014968                 YA
##                        proporsi_pt  2.8042233                 YA
##      proporsi_industri_mikro_kecil  2.5168872                 YA
##                      proporsi_sktm  2.4698791                 YA
##                proporsi_kk_bekerja         NA              TIDAK
##                   Dependency_Ratio         NA              TIDAK
##  Proporsi_Keluarga_Usaha_Pertanian         NA              TIDAK
##                Proporsi_KK_Bekerja         NA              TIDAK
##                          Sex_Ratio         NA              TIDAK
##              proporsi_tenaga_medis  0.6664919              TIDAK
vars_sarbagita <- sapply(vars_kandidat, function(v) {
  if (v %in% vars_to_log_51 && all(aux_data_51[[v]] >= 0, na.rm = TRUE)) {
    paste0(v, "_log")
  } else {
    v
  }
}, USE.NAMES = FALSE)

# Untuk Banjarbakula (63)
aux_data_63 <- aux_data[substr(aux_data$id_kec, 1, 2) == "63", ]
skewness_info_63 <- sapply(vars_kandidat, function(v) {
  x <- aux_data_63[[v]]
  if (is.numeric(x) && !all(is.na(x))) {
    moments::skewness(x, na.rm = TRUE)
  } else {
    NA
  }
})
vars_to_log_63 <- names(skewness_info_63)[which(abs(skewness_info_63) > 1)]
for (v in vars_to_log_63) {
  if (all(aux_data_63[[v]] >= 0, na.rm = TRUE)) {
    aux_data_63[[paste0(v, "_log")]] <- log1p(aux_data_63[[v]])
  }
}
skew_table_63 <- data.frame(
  Variabel = names(skewness_info_63),
  Skewness = as.numeric(skewness_info_63),
  Transformasi_Log1p = ifelse(names(skewness_info_63) %in% vars_to_log_63, "YA", "TIDAK")
)
print(skew_table_63, row.names = FALSE)
##                           Variabel   Skewness Transformasi_Log1p
##                jumlah_bts_per_desa  2.4946949                 YA
##            proporsi_desa_pertanian -1.5768372                 YA
##        proporsi_desa_non_pertanian  1.5768372                 YA
##          proporsi_lembaga_keuangan  5.5146536                 YA
##                  proporsi_koperasi  3.9903342                 YA
##            proporsi_sarana_ekonomi  3.3676629                 YA
##     proporsi_desa_fasilitas_kredit -1.6955045                 YA
##          proporsi_sarana_kesehatan  3.4704144                 YA
##       proporsi_sarana_keterampilan  5.6192002                 YA
##              proporsi_keluarga_blt  1.8872717                 YA
##      proporsi_keluarga_padat_karya  1.7705376                 YA
##            proporsi_keluarga_kumuh  4.5905992                 YA
##      proporsi_desa_air_minum_layak -0.3272712              TIDAK
##       proporsi_desa_sanitasi_layak -0.9364448              TIDAK
##          proporsi_desa_jalan_darat -3.8694692                 YA
##          proporsi_desa_sinyal_kuat -1.6937043                 YA
##        proporsi_desa_angkutan_umum -1.0254434                 YA
##                        proporsi_sd  0.3621029              TIDAK
##                       proporsi_smp  1.0956467                 YA
##                       proporsi_sma  1.4472709                 YA
##                        proporsi_pt  3.4413063                 YA
##      proporsi_industri_mikro_kecil  4.5083357                 YA
##                      proporsi_sktm  2.8504081                 YA
##                proporsi_kk_bekerja         NA              TIDAK
##                   Dependency_Ratio         NA              TIDAK
##  Proporsi_Keluarga_Usaha_Pertanian         NA              TIDAK
##                Proporsi_KK_Bekerja         NA              TIDAK
##                          Sex_Ratio         NA              TIDAK
##              proporsi_tenaga_medis  0.8656454              TIDAK
vars_banjarbakula <- vars_sarbagita

3. Validasi Kolom Kunci dan Penyesuaian Domain

Domain (id_kec) disamakan agar data konsisten antar sumber.

stopifnot("id_kec" %in% names(surveyvars_data))
stopifnot(all(c("id_kec", "direct_estimate", "direct_cv") %in% names(direct_data)))
direct_data <- direct_data %>%
  mutate(direct_var = (direct_cv/100 * direct_estimate)^2)

aux_data$id_kec <- as.character(aux_data$id_kec)
direct_data$id_kec <- as.character(direct_data$id_kec)
common_domains <- intersect(aux_data$id_kec, direct_data$id_kec)
aux_data <- aux_data[aux_data$id_kec %in% common_domains, ]
direct_data <- direct_data[direct_data$id_kec %in% common_domains, ]

surveyvars_data$id_kec <- as.character(surveyvars_data$id_kec)
surveyvars_data <- surveyvars_data[surveyvars_data$id_kec %in% direct_data$id_kec, ]
direct_data <- left_join(direct_data, surveyvars_data, by = "id_kec")

4. Seleksi dan Analisis Multikolinearitas Variabel Kandidat

Variabel prediktor yang sangat berkorelasi dihapus untuk menghindari multikolinearitas.

remove_highly_correlated <- function(data, vars, threshold = 0.8) {
  aux_vars <- vars[sapply(vars, function(v) { x <- data[[v]]; !all(is.na(x)) && sd(x, na.rm = TRUE) > 0 })]
  datamat <- data[, aux_vars, drop=FALSE]
  to_remove <- c()
  repeat {
    cor_mat <- cor(datamat, use = "pairwise.complete.obs")
    cor_mat[lower.tri(cor_mat, diag = TRUE)] <- NA
    max_cor <- max(abs(cor_mat), na.rm = TRUE)
    if (is.na(max_cor) || max_cor <= threshold) break
    idx <- which(abs(cor_mat) == max_cor, arr.ind = TRUE)[1, ]
    v1 <- rownames(cor_mat)[idx[1]]
    v2 <- colnames(cor_mat)[idx[2]]
    var1 <- var(datamat[[v1]], na.rm = TRUE)
    var2 <- var(datamat[[v2]], na.rm = TRUE)
    if (var1 >= var2) {
      to_remove <- c(to_remove, v2)
      datamat[[v2]] <- NULL
    } else {
      to_remove <- c(to_remove, v1)
      datamat[[v1]] <- NULL
    }
  }
  final_vars <- setdiff(aux_vars, to_remove)
  return(final_vars)
}

5. Pemodelan Fay-Herriot

Model FH dibangun untuk masing-masing wilayah, baik tanpa maupun dengan transformasi log. Feature selection dilakukan otomatis.

run_sae_workflow_nolog <- function(aux_data, direct_data, vars_awal, prefix_file) {
  aux_sub <- aux_data[substr(aux_data$id_kec, 1, 2) == prefix_file, ]
  direct_sub <- direct_data[substr(direct_data$id_kec, 1, 2) == prefix_file, ]
  candidate_vars_sub <- left_join(aux_sub, direct_sub, by = "id_kec", suffix = c("_aux", "_direct"))
  vars_final <- remove_highly_correlated(candidate_vars_sub, vars_awal)
  combined_data_sub <- emdi::combine_data(
    pop_data = aux_sub, pop_domains = "id_kec",
    smp_data = direct_sub, smp_domains = "id_kec"
  )
  assign("combined_data_sub", combined_data_sub, envir = .GlobalEnv)
  fh_model <- fh(
    fixed = as.formula(paste("direct_estimate ~", paste(vars_final, collapse = " + ")) ),
    vardir = "direct_var",
    combined_data = combined_data_sub,
    domains = "id_kec",
    MSE = TRUE,
    transformation = "no",
    backtransformation = NULL,
    method = "ml",
    interval = c(0, 1e20)
  )
  step_fh <- emdi::step(fh_model, criterion = "AICc", direction = "both")
  return(step_fh)
}

fh_sarbagita_nolog <- run_sae_workflow_nolog(aux_data, direct_data, vars_sarbagita, "51")
## Warning in fh(fixed = as.formula(paste("direct_estimate ~", paste(vars_final, :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Start: AIC = 621.1
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Dependency_Ratio + Proporsi_KK_Bekerja + Sex_Ratio + proporsi_tenaga_medis
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## Dependency_Ratio + : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                           df    AIC
## - Dependency_Ratio         1 619.13
## - Proporsi_KK_Bekerja      1 619.17
## - proporsi_tenaga_medis    1 619.83
## - proporsi_keluarga_blt    1 620.72
## <none>                       621.10
## - Sex_Ratio                1 627.03
## - proporsi_desa_pertanian  1 629.37
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 619.13
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Proporsi_KK_Bekerja + Sex_Ratio + proporsi_tenaga_medis
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## Proporsi_KK_Bekerja + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                           df    AIC
## - Proporsi_KK_Bekerja      1 617.23
## - proporsi_tenaga_medis    1 618.24
## - proporsi_keluarga_blt    1 618.87
## <none>                       619.13
## + Dependency_Ratio         1 621.10
## - Sex_Ratio                1 625.03
## - proporsi_desa_pertanian  1 627.44
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 617.23
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Sex_Ratio + proporsi_tenaga_medis
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio + :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                           df    AIC
## - proporsi_tenaga_medis    1 616.48
## - proporsi_keluarga_blt    1 616.95
## <none>                       617.23
## + Proporsi_KK_Bekerja      1 619.13
## + Dependency_Ratio         1 619.17
## - Sex_Ratio                1 623.35
## - proporsi_desa_pertanian  1 625.44
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 616.48
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Sex_Ratio
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio, :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                           df    AIC
## - proporsi_keluarga_blt    1 615.98
## <none>                       616.48
## + proporsi_tenaga_medis    1 617.23
## + Dependency_Ratio         1 617.89
## + Proporsi_KK_Bekerja      1 618.24
## - Sex_Ratio                1 622.49
## - proporsi_desa_pertanian  1 624.79
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio, :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## 
## Step: AIC = 615.98
## direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio + :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio + :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio + :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio + :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
##                           df    AIC
## <none>                       615.98
## + proporsi_keluarga_blt    1 616.48
## + proporsi_tenaga_medis    1 616.95
## + Dependency_Ratio         1 617.19
## + Proporsi_KK_Bekerja      1 617.78
## - Sex_Ratio                1 620.52
## - proporsi_desa_pertanian  1 638.17
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio, :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
fh_banjarbakula_nolog <- run_sae_workflow_nolog(aux_data, direct_data, vars_banjarbakula, "63")
## Warning in fh(fixed = as.formula(paste("direct_estimate ~", paste(vars_final, :
## The estimate of the variance of the random effects falls at the interval limit.
## It is recommended to choose a larger interval for the estimation of the
## variance of the random effects (specify interval input argument).
## Start: AIC = 1197.84
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     proporsi_desa_sanitasi_layak + proporsi_desa_jalan_darat + 
##     Dependency_Ratio + Proporsi_KK_Bekerja + Sex_Ratio + proporsi_tenaga_medis
## Warning in fh(fixed = direct_estimate ~ proporsi_keluarga_blt +
## proporsi_desa_sanitasi_layak + : The estimate of the variance of the random
## effects falls at the interval limit. It is recommended to choose a larger
## interval for the estimation of the variance of the random effects (specify
## interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_desa_sanitasi_layak + : The estimate of the variance of the random
## effects falls at the interval limit. It is recommended to choose a larger
## interval for the estimation of the variance of the random effects (specify
## interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - proporsi_desa_jalan_darat     1 1195.8
## - Dependency_Ratio              1 1195.9
## - Proporsi_KK_Bekerja           1 1196.0
## - proporsi_desa_sanitasi_layak  1 1196.1
## - Sex_Ratio                     1 1196.7
## - proporsi_tenaga_medis         1 1196.8
## <none>                            1197.8
## - proporsi_keluarga_blt         1 1205.7
## - proporsi_desa_pertanian       1 1210.2
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 1195.84
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     proporsi_desa_sanitasi_layak + Dependency_Ratio + Proporsi_KK_Bekerja + 
##     Sex_Ratio + proporsi_tenaga_medis
## Warning in fh(fixed = direct_estimate ~ proporsi_keluarga_blt +
## proporsi_desa_sanitasi_layak + : The estimate of the variance of the random
## effects falls at the interval limit. It is recommended to choose a larger
## interval for the estimation of the variance of the random effects (specify
## interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_desa_sanitasi_layak + : The estimate of the variance of the random
## effects falls at the interval limit. It is recommended to choose a larger
## interval for the estimation of the variance of the random effects (specify
## interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - Dependency_Ratio              1 1193.9
## - proporsi_desa_sanitasi_layak  1 1194.1
## - Proporsi_KK_Bekerja           1 1194.2
## - Sex_Ratio                     1 1194.7
## - proporsi_tenaga_medis         1 1194.8
## <none>                            1195.8
## + proporsi_desa_jalan_darat     1 1197.8
## - proporsi_desa_pertanian       1 1209.5
## - proporsi_keluarga_blt         1 1210.1
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 1193.89
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     proporsi_desa_sanitasi_layak + Proporsi_KK_Bekerja + Sex_Ratio + 
##     proporsi_tenaga_medis
## Warning in fh(fixed = direct_estimate ~ proporsi_keluarga_blt +
## proporsi_desa_sanitasi_layak + : The estimate of the variance of the random
## effects falls at the interval limit. It is recommended to choose a larger
## interval for the estimation of the variance of the random effects (specify
## interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_desa_sanitasi_layak + : The estimate of the variance of the random
## effects falls at the interval limit. It is recommended to choose a larger
## interval for the estimation of the variance of the random effects (specify
## interval input argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - proporsi_desa_sanitasi_layak  1 1192.2
## - Proporsi_KK_Bekerja           1 1192.2
## - Sex_Ratio                     1 1192.7
## - proporsi_tenaga_medis         1 1192.8
## <none>                            1193.9
## + Dependency_Ratio              1 1195.8
## + proporsi_desa_jalan_darat     1 1195.9
## - proporsi_desa_pertanian       1 1207.7
## - proporsi_keluarga_blt         1 1209.6
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 1192.16
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Proporsi_KK_Bekerja + Sex_Ratio + proporsi_tenaga_medis
## Warning in fh(fixed = direct_estimate ~ proporsi_keluarga_blt +
## Proporsi_KK_Bekerja + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## Proporsi_KK_Bekerja + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - Sex_Ratio                     1 1190.9
## - proporsi_tenaga_medis         1 1191.0
## - Proporsi_KK_Bekerja           1 1191.3
## <none>                            1192.2
## + proporsi_desa_sanitasi_layak  1 1193.9
## + Dependency_Ratio              1 1194.1
## + proporsi_desa_jalan_darat     1 1194.2
## - proporsi_desa_pertanian       1 1207.4
## - proporsi_keluarga_blt         1 1207.9
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 1190.91
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Proporsi_KK_Bekerja + proporsi_tenaga_medis
## Warning in fh(fixed = direct_estimate ~ proporsi_keluarga_blt +
## Proporsi_KK_Bekerja + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## Proporsi_KK_Bekerja + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - proporsi_tenaga_medis         1 1189.8
## - Proporsi_KK_Bekerja           1 1190.7
## <none>                            1190.9
## + Sex_Ratio                     1 1192.2
## + proporsi_desa_sanitasi_layak  1 1192.7
## + proporsi_desa_jalan_darat     1 1192.9
## + Dependency_Ratio              1 1192.9
## - proporsi_keluarga_blt         1 1206.0
## - proporsi_desa_pertanian       1 1207.0
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 1189.76
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Proporsi_KK_Bekerja
## Warning in fh(fixed = direct_estimate ~ proporsi_keluarga_blt +
## Proporsi_KK_Bekerja, : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## Proporsi_KK_Bekerja, : The estimate of the variance of the random effects falls
## at the interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt, : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## - Proporsi_KK_Bekerja           1 1188.8
## <none>                            1189.8
## + proporsi_tenaga_medis         1 1190.9
## + Sex_Ratio                     1 1191.0
## + proporsi_desa_sanitasi_layak  1 1191.6
## + Dependency_Ratio              1 1191.7
## + proporsi_desa_jalan_darat     1 1191.8
## - proporsi_keluarga_blt         1 1204.3
## - proporsi_desa_pertanian       1 1208.1
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt, : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## 
## Step: AIC = 1188.83
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt
## Warning in fh(fixed = direct_estimate ~ proporsi_keluarga_blt, vardir =
## "direct_var", : The estimate of the variance of the random effects falls at the
## interval limit. It is recommended to choose a larger interval for the
## estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt + : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
##                                df    AIC
## <none>                            1188.8
## + Sex_Ratio                     1 1189.5
## + Proporsi_KK_Bekerja           1 1189.8
## + proporsi_desa_sanitasi_layak  1 1190.0
## + Dependency_Ratio              1 1190.6
## + proporsi_desa_jalan_darat     1 1190.6
## + proporsi_tenaga_medis         1 1190.7
## - proporsi_keluarga_blt         1 1206.0
## - proporsi_desa_pertanian       1 1206.2
## Warning in fh(fixed = direct_estimate ~ proporsi_desa_pertanian +
## proporsi_keluarga_blt, : The estimate of the variance of the random effects
## falls at the interval limit. It is recommended to choose a larger interval for
## the estimation of the variance of the random effects (specify interval input
## argument).
plot(fh_sarbagita_nolog, type = "est", main = "Estimasi Fay-Herriot Sarbagita (51) - NOLOG")

## Press [enter] to continue

## Press [enter] to continue

plot(fh_banjarbakula_nolog, type = "est", main = "Estimasi Fay-Herriot Banjarbakula (63) - NOLOG")

## Press [enter] to continue

## Press [enter] to continue

run_sae_workflow <- function(aux_data, direct_data, vars_awal, prefix_file) {
  aux_sub <- aux_data[substr(aux_data$id_kec, 1, 2) == prefix_file, ]
  direct_sub <- direct_data[substr(direct_data$id_kec, 1, 2) == prefix_file, ]
  candidate_vars_sub <- left_join(aux_sub, direct_sub, by = "id_kec", suffix = c("_aux", "_direct"))
  vars_final <- remove_highly_correlated(candidate_vars_sub, vars_awal)
  combined_data_sub <- emdi::combine_data(
    pop_data = aux_sub, pop_domains = "id_kec",
    smp_data = direct_sub, smp_domains = "id_kec"
  )
  assign("combined_data_sub", combined_data_sub, envir = .GlobalEnv)
  fh_model <- fh(
    fixed = as.formula(paste("direct_estimate ~", paste(vars_final, collapse = " + ")) ),
    vardir = "direct_var",
    combined_data = combined_data_sub,
    domains = "id_kec",
    MSE = TRUE,
    transformation = "log",
    backtransformation = "bc_crude",
    method = "ml",
    interval = c(0, 1e20)
  )
  step_fh <- emdi::step(fh_model, criterion = "AICc", direction = "both")
  return(step_fh)
}

fh_sarbagita <- run_sae_workflow(aux_data, direct_data, vars_sarbagita, "51")
## Start: AIC = 52.04
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Dependency_Ratio + Proporsi_KK_Bekerja + Sex_Ratio + proporsi_tenaga_medis
##                           df    AIC
## - Dependency_Ratio         1 50.100
## - proporsi_tenaga_medis    1 50.149
## - proporsi_keluarga_blt    1 50.256
## - Proporsi_KK_Bekerja      1 50.265
## <none>                       52.042
## - Sex_Ratio                1 54.731
## - proporsi_desa_pertanian  1 56.438
## 
## Step: AIC = 50.1
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Proporsi_KK_Bekerja + Sex_Ratio + proporsi_tenaga_medis
##                           df    AIC
## - proporsi_tenaga_medis    1 48.163
## - proporsi_keluarga_blt    1 48.267
## - Proporsi_KK_Bekerja      1 48.317
## <none>                       50.100
## + Dependency_Ratio         1 52.042
## - Sex_Ratio                1 52.767
## - proporsi_desa_pertanian  1 54.447
## 
## Step: AIC = 48.16
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Proporsi_KK_Bekerja + Sex_Ratio
##                           df    AIC
## - proporsi_keluarga_blt    1 46.357
## - Proporsi_KK_Bekerja      1 46.450
## <none>                       48.163
## + proporsi_tenaga_medis    1 50.100
## + Dependency_Ratio         1 50.149
## - Sex_Ratio                1 51.027
## - proporsi_desa_pertanian  1 52.590
## 
## Step: AIC = 46.36
## direct_estimate ~ proporsi_desa_pertanian + Proporsi_KK_Bekerja + 
##     Sex_Ratio
##                           df    AIC
## - Proporsi_KK_Bekerja      1 44.601
## <none>                       46.357
## + proporsi_keluarga_blt    1 48.163
## + proporsi_tenaga_medis    1 48.267
## + Dependency_Ratio         1 48.356
## - Sex_Ratio                1 49.071
## - proporsi_desa_pertanian  1 59.055
## 
## Step: AIC = 44.6
## direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio
##                           df    AIC
## <none>                       44.601
## + Proporsi_KK_Bekerja      1 46.357
## + proporsi_tenaga_medis    1 46.442
## + proporsi_keluarga_blt    1 46.450
## + Dependency_Ratio         1 46.596
## - Sex_Ratio                1 47.071
## - proporsi_desa_pertanian  1 57.068
fh_banjarbakula <- run_sae_workflow(aux_data, direct_data, vars_banjarbakula, "63")
## Start: AIC = 101.78
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     proporsi_desa_sanitasi_layak + proporsi_desa_jalan_darat + 
##     Dependency_Ratio + Proporsi_KK_Bekerja + Sex_Ratio + proporsi_tenaga_medis
##                                df     AIC
## - proporsi_tenaga_medis         1  99.975
## - Dependency_Ratio              1  99.977
## - proporsi_desa_sanitasi_layak  1 100.042
## - proporsi_desa_jalan_darat     1 100.053
## - Proporsi_KK_Bekerja           1 100.083
## - Sex_Ratio                     1 100.729
## <none>                            101.776
## - proporsi_desa_pertanian       1 103.549
## - proporsi_keluarga_blt         1 109.399
## 
## Step: AIC = 99.98
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     proporsi_desa_sanitasi_layak + proporsi_desa_jalan_darat + 
##     Dependency_Ratio + Proporsi_KK_Bekerja + Sex_Ratio
##                                df     AIC
## - Proporsi_KK_Bekerja           1  98.201
## - Dependency_Ratio              1  98.239
## - proporsi_desa_jalan_darat     1  98.276
## - proporsi_desa_sanitasi_layak  1  98.308
## - Sex_Ratio                     1  99.018
## <none>                             99.975
## - proporsi_desa_pertanian       1 101.641
## + proporsi_tenaga_medis         1 101.776
## - proporsi_keluarga_blt         1 107.958
## 
## Step: AIC = 98.2
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     proporsi_desa_sanitasi_layak + proporsi_desa_jalan_darat + 
##     Dependency_Ratio + Sex_Ratio
##                                df     AIC
## - Dependency_Ratio              1  96.458
## - proporsi_desa_sanitasi_layak  1  96.604
## - proporsi_desa_jalan_darat     1  96.605
## - Sex_Ratio                     1  97.530
## <none>                             98.201
## - proporsi_desa_pertanian       1  99.659
## + Proporsi_KK_Bekerja           1  99.975
## + proporsi_tenaga_medis         1 100.083
## - proporsi_keluarga_blt         1 106.786
## 
## Step: AIC = 96.46
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     proporsi_desa_sanitasi_layak + proporsi_desa_jalan_darat + 
##     Sex_Ratio
##                                df     AIC
## - proporsi_desa_jalan_darat     1  95.010
## - proporsi_desa_sanitasi_layak  1  95.022
## - Sex_Ratio                     1  95.738
## <none>                             96.458
## - proporsi_desa_pertanian       1  97.739
## + Dependency_Ratio              1  98.201
## + Proporsi_KK_Bekerja           1  98.239
## + proporsi_tenaga_medis         1  98.292
## - proporsi_keluarga_blt         1 107.389
## 
## Step: AIC = 95.01
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     proporsi_desa_sanitasi_layak + Sex_Ratio
##                                df     AIC
## - proporsi_desa_sanitasi_layak  1  93.173
## - Sex_Ratio                     1  94.256
## <none>                             95.010
## - proporsi_desa_pertanian       1  95.760
## + proporsi_desa_jalan_darat     1  96.458
## + Dependency_Ratio              1  96.605
## + Proporsi_KK_Bekerja           1  96.665
## + proporsi_tenaga_medis         1  96.818
## - proporsi_keluarga_blt         1 105.813
## 
## Step: AIC = 93.17
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt + 
##     Sex_Ratio
##                                df     AIC
## - Sex_Ratio                     1  92.367
## <none>                             93.173
## - proporsi_desa_pertanian       1  94.341
## + Dependency_Ratio              1  94.713
## + Proporsi_KK_Bekerja           1  94.817
## + proporsi_tenaga_medis         1  94.936
## + proporsi_desa_sanitasi_layak  1  95.010
## + proporsi_desa_jalan_darat     1  95.022
## - proporsi_keluarga_blt         1 103.914
## 
## Step: AIC = 92.37
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt
##                                df     AIC
## <none>                             92.367
## + Sex_Ratio                     1  93.173
## + Proporsi_KK_Bekerja           1  93.704
## + Dependency_Ratio              1  93.983
## + proporsi_tenaga_medis         1  94.098
## - proporsi_desa_pertanian       1  94.170
## + proporsi_desa_jalan_darat     1  94.199
## + proporsi_desa_sanitasi_layak  1  94.256
## - proporsi_keluarga_blt         1 101.926

6. Interpretasi dan Visualisasi Hasil

Hasil model dievaluasi dengan melihat koefisien, summary, dan plot estimasi.

cat("Model Sarbagita (51):\n")
## Model Sarbagita (51):
print(fh_sarbagita$fixed)
## direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio
## <environment: 0x000002271f489c40>
cat("\nModel Banjarbakula (63):\n")
## 
## Model Banjarbakula (63):
print(fh_banjarbakula$fixed)
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt
## <environment: 0x00000227208b0628>
summary(fh_sarbagita)
## Call:
##  fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio, 
##     vardir = "direct_var", combined_data = combined_data_sub, 
##     domains = "id_kec", method = "ml", interval = c(0, 1e+20), 
##     transformation = "log", backtransformation = "bc_crude", 
##     MSE = TRUE)
## 
## Out-of-sample domains:  0 
## In-sample domains:  27 
## 
## Variance and MSE estimation:
## Variance estimation method:  ml 
## Estimated variance component(s):  0.09654118 
## MSE method:  datta-lahiri 
## 
## Coefficients:
##                         coefficients std.error t.value   p.value    
## (Intercept)                  5.57149   2.76569  2.0145   0.04396 *  
## proporsi_desa_pertanian     -1.24056   0.29306 -4.2332 2.304e-05 ***
## Sex_Ratio                    5.70075   2.69281  2.1170   0.03426 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Explanatory measures:
##     loglike      AIC      BIC     AdjR2     FH_R2
## 1 -18.30059 44.60118 49.78453 0.6923171 0.8262885
## 
## Residual diagnostics:
##                          Skewness Kurtosis Shapiro_W Shapiro_p
## Standardized_Residuals -0.1424257 2.215373 0.9661113 0.5030593
## Random_effects          0.4541723 3.150265 0.9778106 0.8099037
## 
## Transformation:
##  Transformation Back_transformation
##             log            bc_crude
summary(fh_banjarbakula)
## Call:
##  fh(fixed = direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt, 
##     vardir = "direct_var", combined_data = combined_data_sub, 
##     domains = "id_kec", method = "ml", interval = c(0, 1e+20), 
##     transformation = "log", backtransformation = "bc_crude", 
##     MSE = TRUE)
## 
## Out-of-sample domains:  0 
## In-sample domains:  57 
## 
## Variance and MSE estimation:
## Variance estimation method:  ml 
## Estimated variance component(s):  0.06755103 
## MSE method:  datta-lahiri 
## 
## Coefficients:
##                         coefficients std.error t.value   p.value    
## (Intercept)                 10.86445   0.11032 98.4825 < 2.2e-16 ***
## proporsi_desa_pertanian     -0.56306   0.28635 -1.9663 0.0492592 *  
## proporsi_keluarga_blt      -14.46784   4.15896 -3.4787 0.0005038 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Explanatory measures:
##    loglike      AIC      BIC     AdjR2     FH_R2
## 1 -42.1836 92.36719 100.5394 0.6898007 0.9544093
## 
## Residual diagnostics:
##                            Skewness Kurtosis Shapiro_W   Shapiro_p
## Standardized_Residuals  0.048942376 2.459219 0.9828289 0.593383642
## Random_effects         -0.003380044 4.518248 0.9430438 0.009626818
## 
## Transformation:
##  Transformation Back_transformation
##             log            bc_crude
plot(fh_sarbagita, type = "est", main = "Estimasi Fay-Herriot Sarbagita (51)")

## Press [enter] to continue

## Press [enter] to continue

plot(fh_banjarbakula, type = "est", main = "Estimasi Fay-Herriot Banjarbakula (63)")

## Press [enter] to continue

## Press [enter] to continue

compare_plot(fh_sarbagita, CV = TRUE, label = "no_title")

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

compare_plot(fh_banjarbakula, CV = TRUE, label = "no_title")

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

cat("Model Sarbagita (51) Tanpa Log:\n")
## Model Sarbagita (51) Tanpa Log:
print(fh_sarbagita_nolog$fixed)
## direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio
## <environment: 0x000002272120c970>
cat("\nModel Banjarbakula (63) Tanpa Log:\n")
## 
## Model Banjarbakula (63) Tanpa Log:
print(fh_banjarbakula_nolog$fixed)
## direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt
## <environment: 0x00000226c9e339d8>
summary(fh_sarbagita_nolog)
## Call:
##  fh(fixed = direct_estimate ~ proporsi_desa_pertanian + Sex_Ratio, 
##     vardir = "direct_var", combined_data = combined_data_sub, 
##     domains = "id_kec", method = "ml", interval = c(0, 1e+20), 
##     transformation = "no", backtransformation = NULL, MSE = TRUE)
## 
## Out-of-sample domains:  0 
## In-sample domains:  27 
## 
## Variance and MSE estimation:
## Variance estimation method:  ml 
## Estimated variance component(s):  5.186947e-05 
## MSE method:  datta-lahiri 
## 
## Coefficients:
##                         coefficients std.error t.value   p.value    
## (Intercept)                  -8517.3   21347.4 -0.3990 0.6899044    
## proporsi_desa_pertanian     -48860.3    7380.7 -6.6200 3.593e-11 ***
## Sex_Ratio                    74782.3   19317.9  3.8711 0.0001083 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Explanatory measures:
##     loglike      AIC      BIC      KIC     AdjR2     FH_R2
## 1 -303.9887 615.9775 621.1608 619.9775 0.5038567 0.6841457
## 
## Residual diagnostics:
##                          Skewness Kurtosis Shapiro_W  Shapiro_p
## Standardized_Residuals -0.2993557 2.485258 0.9759357 0.76143313
## Random_effects         -0.7715159 2.798955 0.9277177 0.06076792
## 
## Transformation: No transformation
summary(fh_banjarbakula_nolog)
## Call:
##  fh(fixed = direct_estimate ~ proporsi_desa_pertanian + proporsi_keluarga_blt, 
##     vardir = "direct_var", combined_data = combined_data_sub, 
##     domains = "id_kec", method = "ml", interval = c(0, 1e+20), 
##     transformation = "no", backtransformation = NULL, MSE = TRUE)
## 
## Out-of-sample domains:  0 
## In-sample domains:  57 
## 
## Variance and MSE estimation:
## Variance estimation method:  ml 
## Estimated variance component(s):  5.186947e-05 
## MSE method:  datta-lahiri 
## 
## Coefficients:
##                         coefficients std.error t.value   p.value    
## (Intercept)                  38082.4    2919.8 13.0428 < 2.2e-16 ***
## proporsi_desa_pertanian     -18144.2    4122.0 -4.4018 1.074e-05 ***
## proporsi_keluarga_blt      -167786.7   32855.0 -5.1069 3.275e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Explanatory measures:
##     loglike      AIC      BIC      KIC     AdjR2     FH_R2
## 1 -590.4162 1188.832 1197.005 1192.832 0.6421601 0.8017479
## 
## Residual diagnostics:
##                           Skewness Kurtosis Shapiro_W    Shapiro_p
## Standardized_Residuals -0.02394201 3.486241 0.9827468 5.894034e-01
## Random_effects         -1.50793805 6.315344 0.8895060 8.336775e-05
## 
## Transformation: No transformation
plot(fh_sarbagita_nolog, type = "est", main = "Estimasi Fay-Herriot Sarbagita (51) Tanpa Log")

## Press [enter] to continue

## Press [enter] to continue

plot(fh_banjarbakula_nolog, type = "est", main = "Estimasi Fay-Herriot Banjarbakula (63) Tanpa Log")

## Press [enter] to continue

## Press [enter] to continue

compare_plot(fh_sarbagita_nolog, CV = TRUE, label = "no_title")

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

compare_plot(fh_banjarbakula_nolog, CV = TRUE, label = "no_title")

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

7. Pemodelan Alternatif: REBLUP

Sebagai alternatif, digunakan metode REBLUP (Robust EBLUP) untuk kedua wilayah.

# Sarbagita (51)
aux_sub_sarbagita <- aux_data[substr(aux_data$id_kec, 1, 2) == "51", ]
direct_sub_sarbagita <- direct_data[substr(direct_data$id_kec, 1, 2) == "51", ]
combined_data_sub_sarbagita <- emdi::combine_data(
  pop_data = aux_sub_sarbagita, pop_domains = "id_kec",
  smp_data = direct_sub_sarbagita, smp_domains = "id_kec"
)
assign("combined_data_sub", combined_data_sub, envir = .GlobalEnv)
reblup_sarbagita <- fh(
  fixed = fh_sarbagita$fixed,
  vardir = "direct_var",
  combined_data = combined_data_sub_sarbagita,
  domains = "id_kec",
  MSE = TRUE,
  method = "reblup",
  mse_type = "pseudo"
)
summary(reblup_sarbagita)
## Call:
##  fh(fixed = fh_sarbagita$fixed, vardir = "direct_var", combined_data = combined_data_sub_sarbagita, 
##     domains = "id_kec", method = "reblup", MSE = TRUE, mse_type = "pseudo")
## 
## Out-of-sample domains:  0 
## In-sample domains:  27 
## 
## Variance and MSE estimation:
## Variance estimation method: robustified ml, reblup 
## k =  1.345 
## Estimated variance component(s):  1.020025e-05 
## MSE method:  pseudo linearization 
## 
## Coefficients:
##                         coefficients std.error t.value   p.value    
## (Intercept)                  -2261.4   21347.4 -0.1059 0.9156347    
## proporsi_desa_pertanian     -52463.0    7380.7 -7.1081 1.177e-12 ***
## Sex_Ratio                    71455.7   19317.9  3.6989 0.0002165 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Explanatory measures:
## No explanatory measures provided 
## 
## Residual diagnostics:
##                          Skewness Kurtosis Shapiro_W Shapiro_p
## Standardized_Residuals -0.4300216 2.698447 0.9716168 0.6449933
## Random_effects         -0.5994367 2.464722 0.9445488 0.1577259
## 
## Transformation: No transformation
plot(reblup_sarbagita, type = "est", main = "Estimasi REBLUP Sarbagita (51)")

## Press [enter] to continue

## Press [enter] to continue

compare_plot(reblup_sarbagita, CV = TRUE, label = "no_title")

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

# Banjarbakula (63)
aux_sub_banjarbakula <- aux_data[substr(aux_data$id_kec, 1, 2) == "63", ]
direct_sub_banjarbakula <- direct_data[substr(direct_data$id_kec, 1, 2) == "63", ]
combined_data_sub_banjarbakula <- emdi::combine_data(
  pop_data = aux_sub_banjarbakula, pop_domains = "id_kec",
  smp_data = direct_sub_banjarbakula, smp_domains = "id_kec"
)
assign("combined_data_sub", combined_data_sub, envir = .GlobalEnv)
reblup_banjarbakula <- fh(
  fixed = fh_banjarbakula$fixed,
  vardir = "direct_var",
  combined_data = combined_data_sub_banjarbakula,
  domains = "id_kec",
  MSE = TRUE,
  method = "reblup",
  mse_type = "pseudo"
)
summary(reblup_banjarbakula)
## Call:
##  fh(fixed = fh_banjarbakula$fixed, vardir = "direct_var", combined_data = combined_data_sub_banjarbakula, 
##     domains = "id_kec", method = "reblup", MSE = TRUE, mse_type = "pseudo")
## 
## Out-of-sample domains:  0 
## In-sample domains:  57 
## 
## Variance and MSE estimation:
## Variance estimation method: robustified ml, reblup 
## k =  1.345 
## Estimated variance component(s):  0.002018499 
## MSE method:  pseudo linearization 
## 
## Coefficients:
##                         coefficients std.error t.value   p.value    
## (Intercept)                  38963.7    2919.8 13.3446 < 2.2e-16 ***
## proporsi_desa_pertanian     -19396.1    4122.0 -4.7055 2.532e-06 ***
## proporsi_keluarga_blt      -164177.8   32855.0 -4.9970 5.822e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Explanatory measures:
## No explanatory measures provided 
## 
## Residual diagnostics:
##                          Skewness Kurtosis Shapiro_W    Shapiro_p
## Standardized_Residuals -0.1339401 3.666444 0.9805110 4.856938e-01
## Random_effects         -1.6611927 7.702495 0.8768882 3.206112e-05
## 
## Transformation: No transformation
plot(reblup_banjarbakula, type = "est", main = "Estimasi REBLUP Banjarbakula (63)")

## Press [enter] to continue

## Press [enter] to continue

compare_plot(reblup_banjarbakula, CV = TRUE, label = "no_title")

## Press [enter] to continue

## Press [enter] to continue

## Press [enter] to continue

8. Deskriptif Jumlah Kecamatan Berdasarkan RSE

Analisis jumlah kecamatan berdasarkan rentang Relative Standard Error (RSE) untuk setiap metode estimasi.

get_rse_summary <- function(cv_values) {
  c(
    `<25` = sum(cv_values < 25, na.rm = TRUE),
    `25-<50` = sum(cv_values >= 25 & cv_values < 50, na.rm = TRUE),
    `>=50` = sum(cv_values >= 50, na.rm = TRUE)
  )
}

# Sarbagita (51)
direct_rse_sarbagita <- 100 * sqrt(fh_sarbagita$MSE$Direct) / fh_sarbagita$ind$Direct
eblup_rse_sarbagita  <- 100 * sqrt(fh_sarbagita$MSE$FH) / fh_sarbagita$ind$FH
reblup_rse_sarbagita <- 100 * sqrt(reblup_sarbagita$MSE$FH) / reblup_sarbagita$ind$FH
eblup_nolog_rse_sarbagita <- 100 * sqrt(fh_sarbagita_nolog$MSE$FH) / fh_sarbagita_nolog$ind$FH

summary_sarbagita <- data.frame(
  Estimasi = c("Direct Estimate", "EBLUP LOG", "EBLUP NOLOG", "REBLUP"),
  `<25` = c(get_rse_summary(direct_rse_sarbagita)[1], get_rse_summary(eblup_rse_sarbagita)[1], get_rse_summary(eblup_nolog_rse_sarbagita)[1], get_rse_summary(reblup_rse_sarbagita)[1]),
  `25-<50` = c(get_rse_summary(direct_rse_sarbagita)[2], get_rse_summary(eblup_rse_sarbagita)[2], get_rse_summary(eblup_nolog_rse_sarbagita)[2], get_rse_summary(reblup_rse_sarbagita)[2]),
  `>=50` = c(get_rse_summary(direct_rse_sarbagita)[3], get_rse_summary(eblup_rse_sarbagita)[3], get_rse_summary(eblup_nolog_rse_sarbagita)[3], get_rse_summary(reblup_rse_sarbagita)[3])
)
print(summary_sarbagita)
##          Estimasi X.25 X25..50 X..50
## 1 Direct Estimate    4      19     4
## 2       EBLUP LOG    8      18     1
## 3     EBLUP NOLOG   26       0     1
## 4          REBLUP   26       0     1
# Banjarbakula (63)
direct_rse_banjarbakula <- 100 * sqrt(fh_banjarbakula$MSE$Direct) / fh_banjarbakula$ind$Direct
eblup_rse_banjarbakula  <- 100 * sqrt(fh_banjarbakula$MSE$FH) / fh_banjarbakula$ind$FH
reblup_rse_banjarbakula <- 100 * sqrt(reblup_banjarbakula$MSE$FH) / reblup_banjarbakula$ind$FH
eblup_nolog_rse_banjarbakula <- 100 * sqrt(fh_banjarbakula_nolog$MSE$FH) / fh_banjarbakula_nolog$ind$FH

summary_banjarbakula <- data.frame(
  Estimasi = c("Direct Estimate", "EBLUP LOG", "EBLUP NOLOG", "REBLUP"),
  `<25` = c(get_rse_summary(direct_rse_banjarbakula)[1], get_rse_summary(eblup_rse_banjarbakula)[1], get_rse_summary(eblup_nolog_rse_banjarbakula)[1], get_rse_summary(reblup_rse_banjarbakula)[1]),
  `25-<50` = c(get_rse_summary(direct_rse_banjarbakula)[2], get_rse_summary(eblup_rse_banjarbakula)[2], get_rse_summary(eblup_nolog_rse_banjarbakula)[2], get_rse_summary(reblup_rse_banjarbakula)[2]),
  `>=50` = c(get_rse_summary(direct_rse_banjarbakula)[3], get_rse_summary(eblup_rse_banjarbakula)[3], get_rse_summary(eblup_nolog_rse_banjarbakula)[3], get_rse_summary(reblup_rse_banjarbakula)[3])
)
print(summary_banjarbakula)
##          Estimasi X.25 X25..50 X..50
## 1 Direct Estimate    6      21    30
## 2       EBLUP LOG   11      46     0
## 3     EBLUP NOLOG   50       3     4
## 4          REBLUP   53       2     2

Penutup

Dokumen ini dapat digunakan sebagai referensi untuk analisis SAE berbasis Fay-Herriot pada data dan wilayah lain dengan struktur serupa. Setiap langkah telah dijelaskan dan disertai chunk kode yang dapat dijalankan langsung.