1 Pendahuluan

Penelitian ini menggunakan simulasi Monte Carlo untuk mengevaluasi kinerja estimasi parameter regresi logistik biner pada berbagai kombinasi ukuran sampel, proporsi missing value, tingkat multikolinearitas, dan metode penanganan missing value.

Metode yang dibandingkan adalah:

  1. Listwise Deletion
  2. Expectation-Maximization (EM) menggunakan package Amelia

Evaluasi dilakukan menggunakan Mean Estimate, Bias, RMSE, dan distribusi error estimasi parameter terhadap parameter sebenarnya.

2 Setup

3 Paket

library(MASS)
library(Amelia)
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)

4 Seed

set.seed(42)

5 Parameter Sebenarnya

beta_true <- c(beta0 = -1.5,
               beta1 =  2.0,
               beta2 = -1.0,
               beta3 =  0.5)

beta_true
## beta0 beta1 beta2 beta3 
##  -1.5   2.0  -1.0   0.5

6 Fungsi Pembangkitan Data

generate_data <- function(n, rho){

  Sigma <- matrix(c(1,rho,0,
                    rho,1,0,
                    0,0,1),
                  nrow = 3, byrow = TRUE)

  X <- MASS::mvrnorm(n, mu = c(0,0,0), Sigma = Sigma)

  x1 <- X[,1]
  x2 <- X[,2]
  x3 <- X[,3]
  
  eta <- beta_true["beta0"] +
         beta_true["beta1"]*x1 +
         beta_true["beta2"]*x2 +
         beta_true["beta3"]*x3

  data.frame(
    y = rbinom(n, 1, plogis(eta)),
    x1, x2, x3
  )
}

7 Fungsi Missing Value

Missing value hanya diberikan pada variabel prediktor x1 dan x2.

create_missing <- function(df, prop){

  if(prop == 0) return(df)

  n <- nrow(df)

  miss_x1 <- sample(n, round(prop*n))
  miss_x2 <- sample(n, round(prop*n))

  df$x1[miss_x1] <- NA
  df$x2[miss_x2] <- NA

  df
}

8 Estimasi Listwise Deletion

estimate_listwise <- function(df){

  tryCatch({

    df <- na.omit(df)

    if(nrow(df) < 20)
      return(setNames(rep(NA,4),
             c("beta0","beta1","beta2","beta3")))

    fit <- glm(y ~ x1 + x2 + x3,
               family = binomial(),
               data = df,
               control = glm.control(maxit = 100))

    est <- coef(fit)

    setNames(
      c(
        unname(est["(Intercept)"]),
        unname(est["x1"]),
        unname(est["x2"]),
        unname(est["x3"])
      ),
      c("beta0","beta1","beta2","beta3")
    )

  }, error = function(e)
     setNames(rep(NA,4),
              c("beta0","beta1","beta2","beta3")))
}

9 Estimasi Expectation-Maximization

Menggunakan package Amelia dengan m = 1. Pada kondisi tanpa missing value, data langsung digunakan tanpa proses imputasi.

estimate_em <- function(df){

  tryCatch({

    if(sum(is.na(df)) == 0){

      df_imp <- df

    } else {

      imp <- amelia(df, m = 1, noms = "y", p2s = 0)
      df_imp <- imp$imputations[[1]]

    }

    fit <- glm(y ~ x1 + x2 + x3,
               family = binomial(),
               data = df_imp,
               control = glm.control(maxit = 100))

    est <- coef(fit)

    setNames(
      c(
        unname(est["(Intercept)"]),
        unname(est["x1"]),
        unname(est["x2"]),
        unname(est["x3"])
      ),
      c("beta0","beta1","beta2","beta3")
    )

  }, error = function(e)
     setNames(rep(NA,4),
              c("beta0","beta1","beta2","beta3")))
}

10 Desain Faktorial Simulasi

design <- expand.grid(
  n = c(50, 200, 1000),
  missing = c(0, 0.10, 0.20),
  rho = c(0, 0.95),
  method = c("Listwise Deletion",
             "Expectation-Maximization")
)

design$scenario <- paste0(
  "N", design$n,
  "_M", design$missing*100,
  "_R", ifelse(design$rho == 0, "0", "95")
)

kable(design,
      caption = "Desain Faktorial Simulasi")
Desain Faktorial Simulasi
n missing rho method scenario
50 0.0 0.00 Listwise Deletion N50_M0_R0
200 0.0 0.00 Listwise Deletion N200_M0_R0
1000 0.0 0.00 Listwise Deletion N1000_M0_R0
50 0.1 0.00 Listwise Deletion N50_M10_R0
200 0.1 0.00 Listwise Deletion N200_M10_R0
1000 0.1 0.00 Listwise Deletion N1000_M10_R0
50 0.2 0.00 Listwise Deletion N50_M20_R0
200 0.2 0.00 Listwise Deletion N200_M20_R0
1000 0.2 0.00 Listwise Deletion N1000_M20_R0
50 0.0 0.95 Listwise Deletion N50_M0_R95
200 0.0 0.95 Listwise Deletion N200_M0_R95
1000 0.0 0.95 Listwise Deletion N1000_M0_R95
50 0.1 0.95 Listwise Deletion N50_M10_R95
200 0.1 0.95 Listwise Deletion N200_M10_R95
1000 0.1 0.95 Listwise Deletion N1000_M10_R95
50 0.2 0.95 Listwise Deletion N50_M20_R95
200 0.2 0.95 Listwise Deletion N200_M20_R95
1000 0.2 0.95 Listwise Deletion N1000_M20_R95
50 0.0 0.00 Expectation-Maximization N50_M0_R0
200 0.0 0.00 Expectation-Maximization N200_M0_R0
1000 0.0 0.00 Expectation-Maximization N1000_M0_R0
50 0.1 0.00 Expectation-Maximization N50_M10_R0
200 0.1 0.00 Expectation-Maximization N200_M10_R0
1000 0.1 0.00 Expectation-Maximization N1000_M10_R0
50 0.2 0.00 Expectation-Maximization N50_M20_R0
200 0.2 0.00 Expectation-Maximization N200_M20_R0
1000 0.2 0.00 Expectation-Maximization N1000_M20_R0
50 0.0 0.95 Expectation-Maximization N50_M0_R95
200 0.0 0.95 Expectation-Maximization N200_M0_R95
1000 0.0 0.95 Expectation-Maximization N1000_M0_R95
50 0.1 0.95 Expectation-Maximization N50_M10_R95
200 0.1 0.95 Expectation-Maximization N200_M10_R95
1000 0.1 0.95 Expectation-Maximization N1000_M10_R95
50 0.2 0.95 Expectation-Maximization N50_M20_R95
200 0.2 0.95 Expectation-Maximization N200_M20_R95
1000 0.2 0.95 Expectation-Maximization N1000_M20_R95
write.csv(
  design,
  "tables/tab1_design.csv",
  row.names = FALSE
)

Jumlah skenario:

nrow(design)
## [1] 36

11 Parameter Monte Carlo

n_rep <- 100

12 Inisialisasi Penyimpanan Hasil

results_list <- vector("list",
                        nrow(design) * n_rep)

counter <- 1

13 Simulasi Monte Carlo

for(s in seq_len(nrow(design))){

  sc <- design[s,]

  cat("Skenario", s, "dari",
      nrow(design), "\n")

  for(r in seq_len(n_rep)){

    df <- generate_data(sc$n, sc$rho)
    df <- create_missing(df, sc$missing)

    est <- if(sc$method ==
              "Listwise Deletion")
             estimate_listwise(df)
           else
             estimate_em(df)

    results_list[[counter]] <- data.frame(
      scenario = sc$scenario,
      n = sc$n,
      missing = sc$missing,
      rho = sc$rho,
      method = sc$method,
      replication = r,
      beta0hat = est["beta0"],
      beta1hat = est["beta1"],
      beta2hat = est["beta2"],
      beta3hat = est["beta3"]
    )

    counter <- counter + 1
  }
}
## Skenario 1 dari 36
## Skenario 2 dari 36 
## Skenario 3 dari 36 
## Skenario 4 dari 36
## Skenario 5 dari 36 
## Skenario 6 dari 36 
## Skenario 7 dari 36
## Skenario 8 dari 36 
## Skenario 9 dari 36 
## Skenario 10 dari 36 
## Skenario 11 dari 36 
## Skenario 12 dari 36 
## Skenario 13 dari 36 
## Skenario 14 dari 36 
## Skenario 15 dari 36 
## Skenario 16 dari 36
## Skenario 17 dari 36 
## Skenario 18 dari 36 
## Skenario 19 dari 36
## Skenario 20 dari 36 
## Skenario 21 dari 36 
## Skenario 22 dari 36
## Skenario 23 dari 36 
## Skenario 24 dari 36 
## Skenario 25 dari 36
## Skenario 26 dari 36 
## Skenario 27 dari 36 
## Skenario 28 dari 36 
## Skenario 29 dari 36 
## Skenario 30 dari 36 
## Skenario 31 dari 36 
## Skenario 32 dari 36 
## Skenario 33 dari 36 
## Skenario 34 dari 36 
## Skenario 35 dari 36 
## Skenario 36 dari 36

14 Menggabungkan Hasil Simulasi

all_results <- bind_rows(results_list)

15 Pemeriksaan Awal

dim(all_results)
## [1] 3600   10
head(all_results)
##            scenario  n missing rho            method replication   beta0hat
## beta0...1 N50_M0_R0 50       0   0 Listwise Deletion           1 -0.4165615
## beta0...2 N50_M0_R0 50       0   0 Listwise Deletion           2 -1.3901099
## beta0...3 N50_M0_R0 50       0   0 Listwise Deletion           3 -1.3542189
## beta0...4 N50_M0_R0 50       0   0 Listwise Deletion           4 -3.3113345
## beta0...5 N50_M0_R0 50       0   0 Listwise Deletion           5 -1.9842905
## beta0...6 N50_M0_R0 50       0   0 Listwise Deletion           6 -3.6186759
##           beta1hat   beta2hat  beta3hat
## beta0...1 8.749971 -2.0760010 0.2518683
## beta0...2 2.916689 -2.3362033 1.2138683
## beta0...3 2.564326 -0.4374358 0.8137437
## beta0...4 2.591129 -2.0547731 0.2150311
## beta0...5 2.854009 -1.5840223 1.2998989
## beta0...6 2.608669 -1.0873121 1.6483927
summary(all_results)
##    scenario               n             missing         rho       
##  Length:3600        Min.   :  50.0   Min.   :0.0   Min.   :0.000  
##  Class :character   1st Qu.:  50.0   1st Qu.:0.0   1st Qu.:0.000  
##  Mode  :character   Median : 200.0   Median :0.1   Median :0.475  
##                     Mean   : 416.7   Mean   :0.1   Mean   :0.475  
##                     3rd Qu.:1000.0   3rd Qu.:0.2   3rd Qu.:0.950  
##                     Max.   :1000.0   Max.   :0.2   Max.   :0.950  
##                       method      replication        beta0hat               
##  Listwise Deletion       :1800   Min.   :  1.00   Min.   :-892165639925000  
##  Expectation-Maximization:1800   1st Qu.: 25.75   1st Qu.:              -2  
##                                  Median : 50.50   Median :              -2  
##                                  Mean   : 50.50   Mean   :   -247823788871  
##                                  3rd Qu.: 75.25   3rd Qu.:              -1  
##                                  Max.   :100.00   Max.   :               0  
##     beta1hat                   beta2hat               
##  Min.   :              -7   Min.   :-679295575724000  
##  1st Qu.:               2   1st Qu.:              -1  
##  Median :               2   Median :              -1  
##  Mean   :    382863792365   Mean   :   -188693215481  
##  3rd Qu.:               2   3rd Qu.:              -1  
##  Max.   :1378309652500000   Max.   :              10  
##     beta3hat              
##  Min.   :            -29  
##  1st Qu.:              0  
##  Median :              1  
##  Mean   :   187552667848  
##  3rd Qu.:              1  
##  Max.   :675189604250000

16 Menyimpan Hasil Mentah

write.csv(all_results,
          "all_results.csv",
          row.names = FALSE)

17 Jumlah Replikasi per Skenario

all_results %>%
  count(scenario, method) %>%
  kable(caption =
          "Jumlah Replikasi per Skenario")
Jumlah Replikasi per Skenario
scenario method n
N1000_M0_R0 Listwise Deletion 100
N1000_M0_R0 Expectation-Maximization 100
N1000_M0_R95 Listwise Deletion 100
N1000_M0_R95 Expectation-Maximization 100
N1000_M10_R0 Listwise Deletion 100
N1000_M10_R0 Expectation-Maximization 100
N1000_M10_R95 Listwise Deletion 100
N1000_M10_R95 Expectation-Maximization 100
N1000_M20_R0 Listwise Deletion 100
N1000_M20_R0 Expectation-Maximization 100
N1000_M20_R95 Listwise Deletion 100
N1000_M20_R95 Expectation-Maximization 100
N200_M0_R0 Listwise Deletion 100
N200_M0_R0 Expectation-Maximization 100
N200_M0_R95 Listwise Deletion 100
N200_M0_R95 Expectation-Maximization 100
N200_M10_R0 Listwise Deletion 100
N200_M10_R0 Expectation-Maximization 100
N200_M10_R95 Listwise Deletion 100
N200_M10_R95 Expectation-Maximization 100
N200_M20_R0 Listwise Deletion 100
N200_M20_R0 Expectation-Maximization 100
N200_M20_R95 Listwise Deletion 100
N200_M20_R95 Expectation-Maximization 100
N50_M0_R0 Listwise Deletion 100
N50_M0_R0 Expectation-Maximization 100
N50_M0_R95 Listwise Deletion 100
N50_M0_R95 Expectation-Maximization 100
N50_M10_R0 Listwise Deletion 100
N50_M10_R0 Expectation-Maximization 100
N50_M10_R95 Listwise Deletion 100
N50_M10_R95 Expectation-Maximization 100
N50_M20_R0 Listwise Deletion 100
N50_M20_R0 Expectation-Maximization 100
N50_M20_R95 Listwise Deletion 100
N50_M20_R95 Expectation-Maximization 100

18 Jumlah Missing Value per Proporsi

design %>%
  distinct(n, missing) %>%
  mutate(
    Missing_Obs = round(n*missing)
  ) %>%
  arrange(n, missing) %>%
  kable(
    caption =
      "Jumlah Missing Value yang Dibangkitkan"
  )
Jumlah Missing Value yang Dibangkitkan
n missing Missing_Obs
50 0.0 0
50 0.1 5
50 0.2 10
200 0.0 0
200 0.1 20
200 0.2 40
1000 0.0 0
1000 0.1 100
1000 0.2 200

19 Mean Estimate

summary_results <- all_results %>%
  group_by(scenario, n, missing, rho, method) %>%
  summarise(
    Mean_B0 = mean(beta0hat, na.rm = TRUE),
    Mean_B1 = mean(beta1hat, na.rm = TRUE),
    Mean_B2 = mean(beta2hat, na.rm = TRUE),
    Mean_B3 = mean(beta3hat, na.rm = TRUE),
    .groups = "drop"
  )

20 Bias

summary_results <- summary_results %>%
  mutate(
    Bias_B0 = Mean_B0 - beta_true["beta0"],
    Bias_B1 = Mean_B1 - beta_true["beta1"],
    Bias_B2 = Mean_B2 - beta_true["beta2"],
    Bias_B3 = Mean_B3 - beta_true["beta3"]
  )

21 RMSE

rmse_results <- all_results %>%
  group_by(scenario, n, missing, rho, method) %>%
  summarise(
    RMSE_B0 = sqrt(mean((beta0hat - beta_true["beta0"])^2, na.rm = TRUE)),
    RMSE_B1 = sqrt(mean((beta1hat - beta_true["beta1"])^2, na.rm = TRUE)),
    RMSE_B2 = sqrt(mean((beta2hat - beta_true["beta2"])^2, na.rm = TRUE)),
    RMSE_B3 = sqrt(mean((beta3hat - beta_true["beta3"])^2, na.rm = TRUE)),
    .groups = "drop"
  )

22 Menggabungkan Bias dan RMSE

summary_results <- left_join(
  summary_results,
  rmse_results,
  by = c(
    "scenario",
    "n",
    "missing",
    "rho",
    "method"
  )
) %>%
  mutate(
    across(
      where(is.numeric),
      ~ round(.x, 4)
    )
  )

23 Tabel Ringkasan Hasil

kable(
  summary_results,
  digits = 4,
  caption = "Mean Estimate, Bias, dan RMSE"
)
Mean Estimate, Bias, dan RMSE
scenario n missing rho method Mean_B0 Mean_B1 Mean_B2 Mean_B3 Bias_B0 Bias_B1 Bias_B2 Bias_B3 RMSE_B0 RMSE_B1 RMSE_B2 RMSE_B3
N1000_M0_R0 1000 0.0 0.00 Listwise Deletion -1.5032 2.0175 -1.0125 0.4996 -0.0032 0.0175 -0.0125 -0.0004 0.1211 0.1377 0.1137 0.1015
N1000_M0_R0 1000 0.0 0.00 Expectation-Maximization -1.5021 2.0213 -0.9870 0.5015 -0.0021 0.0213 0.0130 0.0015 0.1091 0.1398 0.1030 0.0978
N1000_M0_R95 1000 0.0 0.95 Listwise Deletion -1.5180 2.0226 -0.9973 0.5117 -0.0180 0.0226 0.0027 0.0117 0.1002 0.3034 0.2797 0.0935
N1000_M0_R95 1000 0.0 0.95 Expectation-Maximization -1.5055 1.9878 -0.9792 0.4919 -0.0055 -0.0122 0.0208 -0.0081 0.0955 0.3190 0.2892 0.0857
N1000_M10_R0 1000 0.1 0.00 Listwise Deletion -1.5078 2.0278 -1.0127 0.4838 -0.0078 0.0278 -0.0127 -0.0162 0.1275 0.1660 0.1136 0.1063
N1000_M10_R0 1000 0.1 0.00 Expectation-Maximization -1.4875 1.9815 -0.9757 0.4928 0.0125 -0.0185 0.0243 -0.0072 0.1434 0.1444 0.1212 0.1041
N1000_M10_R95 1000 0.1 0.95 Listwise Deletion -1.5162 2.0336 -1.0205 0.4892 -0.0162 0.0336 -0.0205 -0.0108 0.1086 0.3506 0.3177 0.0943
N1000_M10_R95 1000 0.1 0.95 Expectation-Maximization -1.4970 2.0034 -0.9974 0.4942 0.0030 0.0034 0.0026 -0.0058 0.1014 0.3541 0.3304 0.0766
N1000_M20_R0 1000 0.2 0.00 Listwise Deletion -1.5028 2.0060 -0.9950 0.5035 -0.0028 0.0060 0.0050 0.0035 0.1201 0.1665 0.1396 0.1104
N1000_M20_R0 1000 0.2 0.00 Expectation-Maximization -1.4765 1.9838 -0.9913 0.4951 0.0235 -0.0162 0.0087 -0.0049 0.1203 0.1626 0.1293 0.1248
N1000_M20_R95 1000 0.2 0.95 Listwise Deletion -1.5225 2.0569 -1.0569 0.5170 -0.0225 0.0569 -0.0569 0.0170 0.1296 0.3672 0.3366 0.1185
N1000_M20_R95 1000 0.2 0.95 Expectation-Maximization -1.5110 2.0108 -1.0124 0.5027 -0.0110 0.0108 -0.0124 0.0027 0.0915 0.4305 0.4108 0.0930
N200_M0_R0 200 0.0 0.00 Listwise Deletion -1.4980 2.0200 -1.0090 0.4931 0.0020 0.0200 -0.0090 -0.0069 0.2628 0.3127 0.2205 0.2346
N200_M0_R0 200 0.0 0.00 Expectation-Maximization -1.5242 2.0451 -1.0283 0.5209 -0.0242 0.0451 -0.0283 0.0209 0.3135 0.3690 0.2491 0.2890
N200_M0_R95 200 0.0 0.95 Listwise Deletion -1.5850 2.1087 -1.0752 0.4753 -0.0850 0.1087 -0.0752 -0.0247 0.2496 0.6889 0.6572 0.1893
N200_M0_R95 200 0.0 0.95 Expectation-Maximization -1.5474 2.0869 -1.0712 0.5435 -0.0474 0.0869 -0.0712 0.0435 0.2361 0.6675 0.6217 0.2148
N200_M10_R0 200 0.1 0.00 Listwise Deletion -1.5724 2.1468 -1.0374 0.5253 -0.0724 0.1468 -0.0374 0.0253 0.3192 0.4706 0.3097 0.2930
N200_M10_R0 200 0.1 0.00 Expectation-Maximization -1.5302 2.0597 -0.9954 0.5152 -0.0302 0.0597 0.0046 0.0152 0.2768 0.3089 0.3041 0.2761
N200_M10_R95 200 0.1 0.95 Listwise Deletion -1.5736 2.0860 -1.0408 0.5233 -0.0736 0.0860 -0.0408 0.0233 0.2525 0.8098 0.7098 0.2501
N200_M10_R95 200 0.1 0.95 Expectation-Maximization -1.5550 1.9691 -1.0081 0.5382 -0.0550 -0.0309 -0.0081 0.0382 0.2552 0.7842 0.7657 0.2209
N200_M20_R0 200 0.2 0.00 Listwise Deletion -1.5440 2.0466 -1.0007 0.5216 -0.0440 0.0466 -0.0007 0.0216 0.3597 0.4269 0.2975 0.3560
N200_M20_R0 200 0.2 0.00 Expectation-Maximization -1.5067 2.0664 -1.0390 0.5219 -0.0067 0.0664 -0.0390 0.0219 0.2967 0.4336 0.3259 0.2224
N200_M20_R95 200 0.2 0.95 Listwise Deletion -1.5915 2.3596 -1.2573 0.5550 -0.0915 0.3596 -0.2573 0.0550 0.3540 0.9277 0.8679 0.2960
N200_M20_R95 200 0.2 0.95 Expectation-Maximization -1.5643 2.0792 -1.0165 0.5505 -0.0643 0.0792 -0.0165 0.0505 0.2936 1.0723 0.9625 0.2418
N50_M0_R0 50 0.0 0.00 Listwise Deletion -8921656399256.8730 13783096525028.1719 -6792955757239.3115 6751896042500.7090 -8921656399255.3730 13783096525026.1719 -6792955757238.3115 6751896042500.2090 89216563992506.1719 137830965250204.9688 67929557572358.7891 67518960424989.7734
N50_M0_R0 50 0.0 0.00 Expectation-Maximization -1.9516 2.7288 -1.3012 0.5796 -0.4516 0.7288 -0.3012 0.0796 1.1187 1.8790 0.8318 0.5330
N50_M0_R95 50 0.0 0.95 Listwise Deletion -1.7187 2.3964 -1.1983 0.6379 -0.2187 0.3964 -0.1983 0.1379 0.5700 1.6725 1.4311 0.5338
N50_M0_R95 50 0.0 0.95 Expectation-Maximization -1.7280 2.4643 -1.2907 0.4772 -0.2280 0.4643 -0.2907 -0.0228 0.5498 1.6919 1.6496 0.5113
N50_M10_R0 50 0.1 0.00 Listwise Deletion -3.7652 5.0681 -3.4512 0.8854 -2.2652 3.0681 -2.4512 0.3854 17.0685 24.1140 20.0645 2.6432
N50_M10_R0 50 0.1 0.00 Expectation-Maximization -10.5024 10.1954 -10.2375 1.9771 -9.0024 8.1954 -9.2375 1.4771 80.2497 68.8023 83.4366 10.1298
N50_M10_R95 50 0.1 0.95 Listwise Deletion -1.7707 1.9063 -0.6668 0.6593 -0.2707 -0.0937 0.3332 0.1593 0.7302 2.1027 2.0449 0.6105
N50_M10_R95 50 0.1 0.95 Expectation-Maximization -1.7794 2.7771 -1.5997 0.6769 -0.2794 0.7771 -0.5997 0.1769 0.8122 3.0857 2.7531 0.6210
N50_M20_R0 50 0.2 0.00 Listwise Deletion -18.3777 24.3025 -9.1396 11.8350 -16.8777 22.3025 -8.1396 11.3350 93.3333 117.6493 46.0217 85.8005
N50_M20_R0 50 0.2 0.00 Expectation-Maximization -5.8328 11.0430 -9.2932 4.7475 -4.3328 9.0430 -8.2932 4.2475 38.1907 83.9585 79.5541 40.4783
N50_M20_R95 50 0.2 0.95 Listwise Deletion -3.4211 4.2490 -2.2189 2.1655 -1.9211 2.2490 -1.2189 1.6655 15.4611 15.8665 9.1446 14.3885
N50_M20_R95 50 0.2 0.95 Expectation-Maximization -1.8694 2.3390 -1.1760 0.5405 -0.3694 0.3390 -0.1760 0.0405 0.7820 2.8619 2.7253 0.6399

24 Menyimpan Ringkasan

write.csv(
  summary_results,
  "tables/tab2_summary_results.csv",
  row.names = FALSE
)

25 Total Bias Absolut

summary_results <- summary_results %>%
  mutate(
    Total_Abs_Bias =
      abs(Bias_B0) +
      abs(Bias_B1) +
      abs(Bias_B2) +
      abs(Bias_B3)
  )

26 Total RMSE

summary_results <- summary_results %>%
  mutate(
    Total_RMSE =
      RMSE_B0 +
      RMSE_B1 +
      RMSE_B2 +
      RMSE_B3
  )

27 Total Error

summary_results <- summary_results %>%
  mutate(
    Total_Error =
      Total_Abs_Bias +
      Total_RMSE
  )

28 Analisis Faktor Simulasi (ANOVA)

anova_data <- summary_results %>%
  select(
    n,
    missing,
    rho,
    method,
    Total_Error
  )

anova_model <- lm(
  Total_Error ~
    factor(n) *
    factor(missing) *
    factor(rho) *
    factor(method),
  data = anova_data
)

anova_results <- anova(anova_model)

anova_results
## Analysis of Variance Table
## 
## Response: Total_Error
##                                                      Df
## factor(n)                                             2
## factor(missing)                                       2
## factor(rho)                                           1
## factor(method)                                        1
## factor(n):factor(missing)                             4
## factor(n):factor(rho)                                 2
## factor(missing):factor(rho)                           2
## factor(n):factor(method)                              2
## factor(missing):factor(method)                        2
## factor(rho):factor(method)                            1
## factor(n):factor(missing):factor(rho)                 4
## factor(n):factor(missing):factor(method)              4
## factor(n):factor(rho):factor(method)                  2
## factor(missing):factor(rho):factor(method)            2
## factor(n):factor(missing):factor(rho):factor(method)  4
## Residuals                                             0
##                                                                             Sum Sq
## factor(n)                                             8833227497840815935068068642
## factor(missing)                                       8833227497768487862660844280
## factor(rho)                                           4416613748916314486086888488
## factor(method)                                        4416613748895659610448646660
## factor(n):factor(missing)                            17666454995537250601660882460
## factor(n):factor(rho)                                 8833227497833239201880428046
## factor(missing):factor(rho)                           8833227497771407065868008060
## factor(n):factor(method)                              8833227497791246653808446664
## factor(missing):factor(method)                        8833227497792170242460200604
## factor(rho):factor(method)                            4416613748893374825286260862
## factor(n):factor(missing):factor(rho)                17666454995542941674662466286
## factor(n):factor(missing):factor(method)             17666454995584417451228800228
## factor(n):factor(rho):factor(method)                  8833227497786750750686806440
## factor(missing):factor(rho):factor(method)            8833227497794590268024608820
## factor(n):factor(missing):factor(rho):factor(method) 17666454995588852881648686482
## Residuals                                                                        0
##                                                                           Mean Sq
## factor(n)                                            4416613748920407967084084826
## factor(missing)                                      4416613748884243931880422640
## factor(rho)                                          4416613748916314486086888488
## factor(method)                                       4416613748895659610448646660
## factor(n):factor(missing)                            4416613748884312650440228640
## factor(n):factor(rho)                                4416613748916619600440264028
## factor(missing):factor(rho)                          4416613748885703532484004080
## factor(n):factor(method)                             4416613748895623326404228882
## factor(missing):factor(method)                       4416613748896085121280600802
## factor(rho):factor(method)                           4416613748893374825286260862
## factor(n):factor(missing):factor(rho)                4416613748885735418448644824
## factor(n):factor(missing):factor(method)             4416613748896104362882200882
## factor(n):factor(rho):factor(method)                 4416613748893375375848408220
## factor(missing):factor(rho):factor(method)           4416613748897295134062804460
## factor(n):factor(missing):factor(rho):factor(method) 4416613748897213220462424628
## Residuals                                                                     NaN
##                                                      F value Pr(>F)
## factor(n)                                                NaN    NaN
## factor(missing)                                          NaN    NaN
## factor(rho)                                              NaN    NaN
## factor(method)                                           NaN    NaN
## factor(n):factor(missing)                                NaN    NaN
## factor(n):factor(rho)                                    NaN    NaN
## factor(missing):factor(rho)                              NaN    NaN
## factor(n):factor(method)                                 NaN    NaN
## factor(missing):factor(method)                           NaN    NaN
## factor(rho):factor(method)                               NaN    NaN
## factor(n):factor(missing):factor(rho)                    NaN    NaN
## factor(n):factor(missing):factor(method)                 NaN    NaN
## factor(n):factor(rho):factor(method)                     NaN    NaN
## factor(missing):factor(rho):factor(method)               NaN    NaN
## factor(n):factor(missing):factor(rho):factor(method)     NaN    NaN
## Residuals
write.csv(
  as.data.frame(anova_results),
  "tables/tab9_anova.csv"
)

29 Ranking Skenario

ranking_df <- summary_results %>%
  arrange(Total_Error) %>%
  mutate(Ranking = row_number()) %>%
  mutate(
    across(
      where(is.numeric),
      ~ round(.x, 4)
    )
  )

30 Tabel Ranking

kable(
  ranking_df,
  digits = 4,
  caption =
    "Ranking Skenario Berdasarkan Total Error"
)
Ranking Skenario Berdasarkan Total Error
scenario n missing rho method Mean_B0 Mean_B1 Mean_B2 Mean_B3 Bias_B0 Bias_B1 Bias_B2 Bias_B3 RMSE_B0 RMSE_B1 RMSE_B2 RMSE_B3 Total_Abs_Bias Total_RMSE Total_Error Ranking
N1000_M0_R0 1000 0.0 0.00 Expectation-Maximization -1.5021 2.0213 -0.9870 0.5015 -0.0021 0.0213 0.0130 0.0015 0.1091 0.1398 0.1030 0.0978 0.0379 0.4497 0.4876 1
N1000_M0_R0 1000 0.0 0.00 Listwise Deletion -1.5032 2.0175 -1.0125 0.4996 -0.0032 0.0175 -0.0125 -0.0004 0.1211 0.1377 0.1137 0.1015 0.0336 0.4740 0.5076 2
N1000_M20_R0 1000 0.2 0.00 Listwise Deletion -1.5028 2.0060 -0.9950 0.5035 -0.0028 0.0060 0.0050 0.0035 0.1201 0.1665 0.1396 0.1104 0.0173 0.5366 0.5539 3
N1000_M10_R0 1000 0.1 0.00 Expectation-Maximization -1.4875 1.9815 -0.9757 0.4928 0.0125 -0.0185 0.0243 -0.0072 0.1434 0.1444 0.1212 0.1041 0.0625 0.5131 0.5756 4
N1000_M10_R0 1000 0.1 0.00 Listwise Deletion -1.5078 2.0278 -1.0127 0.4838 -0.0078 0.0278 -0.0127 -0.0162 0.1275 0.1660 0.1136 0.1063 0.0645 0.5134 0.5779 5
N1000_M20_R0 1000 0.2 0.00 Expectation-Maximization -1.4765 1.9838 -0.9913 0.4951 0.0235 -0.0162 0.0087 -0.0049 0.1203 0.1626 0.1293 0.1248 0.0533 0.5370 0.5903 6
N1000_M0_R95 1000 0.0 0.95 Listwise Deletion -1.5180 2.0226 -0.9973 0.5117 -0.0180 0.0226 0.0027 0.0117 0.1002 0.3034 0.2797 0.0935 0.0550 0.7768 0.8318 7
N1000_M0_R95 1000 0.0 0.95 Expectation-Maximization -1.5055 1.9878 -0.9792 0.4919 -0.0055 -0.0122 0.0208 -0.0081 0.0955 0.3190 0.2892 0.0857 0.0466 0.7894 0.8360 8
N1000_M10_R95 1000 0.1 0.95 Expectation-Maximization -1.4970 2.0034 -0.9974 0.4942 0.0030 0.0034 0.0026 -0.0058 0.1014 0.3541 0.3304 0.0766 0.0148 0.8625 0.8773 9
N1000_M10_R95 1000 0.1 0.95 Listwise Deletion -1.5162 2.0336 -1.0205 0.4892 -0.0162 0.0336 -0.0205 -0.0108 0.1086 0.3506 0.3177 0.0943 0.0811 0.8712 0.9523 10
N1000_M20_R95 1000 0.2 0.95 Expectation-Maximization -1.5110 2.0108 -1.0124 0.5027 -0.0110 0.0108 -0.0124 0.0027 0.0915 0.4305 0.4108 0.0930 0.0369 1.0258 1.0627 11
N200_M0_R0 200 0.0 0.00 Listwise Deletion -1.4980 2.0200 -1.0090 0.4931 0.0020 0.0200 -0.0090 -0.0069 0.2628 0.3127 0.2205 0.2346 0.0379 1.0306 1.0685 12
N1000_M20_R95 1000 0.2 0.95 Listwise Deletion -1.5225 2.0569 -1.0569 0.5170 -0.0225 0.0569 -0.0569 0.0170 0.1296 0.3672 0.3366 0.1185 0.1533 0.9519 1.1052 13
N200_M10_R0 200 0.1 0.00 Expectation-Maximization -1.5302 2.0597 -0.9954 0.5152 -0.0302 0.0597 0.0046 0.0152 0.2768 0.3089 0.3041 0.2761 0.1097 1.1659 1.2756 14
N200_M0_R0 200 0.0 0.00 Expectation-Maximization -1.5242 2.0451 -1.0283 0.5209 -0.0242 0.0451 -0.0283 0.0209 0.3135 0.3690 0.2491 0.2890 0.1185 1.2206 1.3391 15
N200_M20_R0 200 0.2 0.00 Expectation-Maximization -1.5067 2.0664 -1.0390 0.5219 -0.0067 0.0664 -0.0390 0.0219 0.2967 0.4336 0.3259 0.2224 0.1340 1.2786 1.4126 16
N200_M20_R0 200 0.2 0.00 Listwise Deletion -1.5440 2.0466 -1.0007 0.5216 -0.0440 0.0466 -0.0007 0.0216 0.3597 0.4269 0.2975 0.3560 0.1129 1.4401 1.5530 17
N200_M10_R0 200 0.1 0.00 Listwise Deletion -1.5724 2.1468 -1.0374 0.5253 -0.0724 0.1468 -0.0374 0.0253 0.3192 0.4706 0.3097 0.2930 0.2819 1.3925 1.6744 18
N200_M0_R95 200 0.0 0.95 Expectation-Maximization -1.5474 2.0869 -1.0712 0.5435 -0.0474 0.0869 -0.0712 0.0435 0.2361 0.6675 0.6217 0.2148 0.2490 1.7401 1.9891 19
N200_M0_R95 200 0.0 0.95 Listwise Deletion -1.5850 2.1087 -1.0752 0.4753 -0.0850 0.1087 -0.0752 -0.0247 0.2496 0.6889 0.6572 0.1893 0.2936 1.7850 2.0786 20
N200_M10_R95 200 0.1 0.95 Expectation-Maximization -1.5550 1.9691 -1.0081 0.5382 -0.0550 -0.0309 -0.0081 0.0382 0.2552 0.7842 0.7657 0.2209 0.1322 2.0260 2.1582 21
N200_M10_R95 200 0.1 0.95 Listwise Deletion -1.5736 2.0860 -1.0408 0.5233 -0.0736 0.0860 -0.0408 0.0233 0.2525 0.8098 0.7098 0.2501 0.2237 2.0222 2.2459 22
N200_M20_R95 200 0.2 0.95 Expectation-Maximization -1.5643 2.0792 -1.0165 0.5505 -0.0643 0.0792 -0.0165 0.0505 0.2936 1.0723 0.9625 0.2418 0.2105 2.5702 2.7807 23
N200_M20_R95 200 0.2 0.95 Listwise Deletion -1.5915 2.3596 -1.2573 0.5550 -0.0915 0.3596 -0.2573 0.0550 0.3540 0.9277 0.8679 0.2960 0.7634 2.4456 3.2090 24
N50_M0_R95 50 0.0 0.95 Listwise Deletion -1.7187 2.3964 -1.1983 0.6379 -0.2187 0.3964 -0.1983 0.1379 0.5700 1.6725 1.4311 0.5338 0.9513 4.2074 5.1587 25
N50_M0_R95 50 0.0 0.95 Expectation-Maximization -1.7280 2.4643 -1.2907 0.4772 -0.2280 0.4643 -0.2907 -0.0228 0.5498 1.6919 1.6496 0.5113 1.0058 4.4026 5.4084 26
N50_M0_R0 50 0.0 0.00 Expectation-Maximization -1.9516 2.7288 -1.3012 0.5796 -0.4516 0.7288 -0.3012 0.0796 1.1187 1.8790 0.8318 0.5330 1.5612 4.3625 5.9237 27
N50_M10_R95 50 0.1 0.95 Listwise Deletion -1.7707 1.9063 -0.6668 0.6593 -0.2707 -0.0937 0.3332 0.1593 0.7302 2.1027 2.0449 0.6105 0.8569 5.4883 6.3452 28
N50_M20_R95 50 0.2 0.95 Expectation-Maximization -1.8694 2.3390 -1.1760 0.5405 -0.3694 0.3390 -0.1760 0.0405 0.7820 2.8619 2.7253 0.6399 0.9249 7.0091 7.9340 29
N50_M10_R95 50 0.1 0.95 Expectation-Maximization -1.7794 2.7771 -1.5997 0.6769 -0.2794 0.7771 -0.5997 0.1769 0.8122 3.0857 2.7531 0.6210 1.8331 7.2720 9.1051 30
N50_M20_R95 50 0.2 0.95 Listwise Deletion -3.4211 4.2490 -2.2189 2.1655 -1.9211 2.2490 -1.2189 1.6655 15.4611 15.8665 9.1446 14.3885 7.0545 54.8607 61.9152 31
N50_M10_R0 50 0.1 0.00 Listwise Deletion -3.7652 5.0681 -3.4512 0.8854 -2.2652 3.0681 -2.4512 0.3854 17.0685 24.1140 20.0645 2.6432 8.1699 63.8902 72.0601 32
N50_M20_R0 50 0.2 0.00 Expectation-Maximization -5.8328 11.0430 -9.2932 4.7475 -4.3328 9.0430 -8.2932 4.2475 38.1907 83.9585 79.5541 40.4783 25.9165 242.1816 268.0981 33
N50_M10_R0 50 0.1 0.00 Expectation-Maximization -10.5024 10.1954 -10.2375 1.9771 -9.0024 8.1954 -9.2375 1.4771 80.2497 68.8023 83.4366 10.1298 27.9124 242.6184 270.5308 34
N50_M20_R0 50 0.2 0.00 Listwise Deletion -18.3777 24.3025 -9.1396 11.8350 -16.8777 22.3025 -8.1396 11.3350 93.3333 117.6493 46.0217 85.8005 58.6548 342.8048 401.4596 35
N50_M0_R0 50 0.0 0.00 Listwise Deletion -8921656399256.8730 13783096525028.1719 -6792955757239.3115 6751896042500.7090 -8921656399255.3730 13783096525026.1719 -6792955757238.3115 6751896042500.2090 89216563992506.1719 137830965250204.9688 67929557572358.7891 67518960424989.7734 36249604724020.0703 362496047240059.6875 398745651964079.7500 36

31 Menyimpan Ranking

write.csv(
  ranking_df,
  "tables/tab3_ranking.csv",
  row.names = FALSE
)

32 10 Skenario Terbaik

ranking_df %>%
  slice_head(n = 10) %>%
  kable(
    digits = 4,
    caption = "10 Skenario Terbaik"
  )
10 Skenario Terbaik
scenario n missing rho method Mean_B0 Mean_B1 Mean_B2 Mean_B3 Bias_B0 Bias_B1 Bias_B2 Bias_B3 RMSE_B0 RMSE_B1 RMSE_B2 RMSE_B3 Total_Abs_Bias Total_RMSE Total_Error Ranking
N1000_M0_R0 1000 0.0 0.00 Expectation-Maximization -1.5021 2.0213 -0.9870 0.5015 -0.0021 0.0213 0.0130 0.0015 0.1091 0.1398 0.1030 0.0978 0.0379 0.4497 0.4876 1
N1000_M0_R0 1000 0.0 0.00 Listwise Deletion -1.5032 2.0175 -1.0125 0.4996 -0.0032 0.0175 -0.0125 -0.0004 0.1211 0.1377 0.1137 0.1015 0.0336 0.4740 0.5076 2
N1000_M20_R0 1000 0.2 0.00 Listwise Deletion -1.5028 2.0060 -0.9950 0.5035 -0.0028 0.0060 0.0050 0.0035 0.1201 0.1665 0.1396 0.1104 0.0173 0.5366 0.5539 3
N1000_M10_R0 1000 0.1 0.00 Expectation-Maximization -1.4875 1.9815 -0.9757 0.4928 0.0125 -0.0185 0.0243 -0.0072 0.1434 0.1444 0.1212 0.1041 0.0625 0.5131 0.5756 4
N1000_M10_R0 1000 0.1 0.00 Listwise Deletion -1.5078 2.0278 -1.0127 0.4838 -0.0078 0.0278 -0.0127 -0.0162 0.1275 0.1660 0.1136 0.1063 0.0645 0.5134 0.5779 5
N1000_M20_R0 1000 0.2 0.00 Expectation-Maximization -1.4765 1.9838 -0.9913 0.4951 0.0235 -0.0162 0.0087 -0.0049 0.1203 0.1626 0.1293 0.1248 0.0533 0.5370 0.5903 6
N1000_M0_R95 1000 0.0 0.95 Listwise Deletion -1.5180 2.0226 -0.9973 0.5117 -0.0180 0.0226 0.0027 0.0117 0.1002 0.3034 0.2797 0.0935 0.0550 0.7768 0.8318 7
N1000_M0_R95 1000 0.0 0.95 Expectation-Maximization -1.5055 1.9878 -0.9792 0.4919 -0.0055 -0.0122 0.0208 -0.0081 0.0955 0.3190 0.2892 0.0857 0.0466 0.7894 0.8360 8
N1000_M10_R95 1000 0.1 0.95 Expectation-Maximization -1.4970 2.0034 -0.9974 0.4942 0.0030 0.0034 0.0026 -0.0058 0.1014 0.3541 0.3304 0.0766 0.0148 0.8625 0.8773 9
N1000_M10_R95 1000 0.1 0.95 Listwise Deletion -1.5162 2.0336 -1.0205 0.4892 -0.0162 0.0336 -0.0205 -0.0108 0.1086 0.3506 0.3177 0.0943 0.0811 0.8712 0.9523 10
write.csv(
  ranking_df %>% slice_head(n = 10),
  "tables/tab4_top10.csv",
  row.names = FALSE
)

33 10 Skenario Terburuk

ranking_df %>%
  slice_tail(n = 10) %>%
  kable(
    digits = 4,
    caption = "10 Skenario Terburuk"
  )
10 Skenario Terburuk
scenario n missing rho method Mean_B0 Mean_B1 Mean_B2 Mean_B3 Bias_B0 Bias_B1 Bias_B2 Bias_B3 RMSE_B0 RMSE_B1 RMSE_B2 RMSE_B3 Total_Abs_Bias Total_RMSE Total_Error Ranking
N50_M0_R0 50 0.0 0.00 Expectation-Maximization -1.9516 2.7288 -1.3012 0.5796 -0.4516 0.7288 -0.3012 0.0796 1.1187 1.8790 0.8318 0.5330 1.5612 4.3625 5.9237 27
N50_M10_R95 50 0.1 0.95 Listwise Deletion -1.7707 1.9063 -0.6668 0.6593 -0.2707 -0.0937 0.3332 0.1593 0.7302 2.1027 2.0449 0.6105 0.8569 5.4883 6.3452 28
N50_M20_R95 50 0.2 0.95 Expectation-Maximization -1.8694 2.3390 -1.1760 0.5405 -0.3694 0.3390 -0.1760 0.0405 0.7820 2.8619 2.7253 0.6399 0.9249 7.0091 7.9340 29
N50_M10_R95 50 0.1 0.95 Expectation-Maximization -1.7794 2.7771 -1.5997 0.6769 -0.2794 0.7771 -0.5997 0.1769 0.8122 3.0857 2.7531 0.6210 1.8331 7.2720 9.1051 30
N50_M20_R95 50 0.2 0.95 Listwise Deletion -3.4211 4.2490 -2.2189 2.1655 -1.9211 2.2490 -1.2189 1.6655 15.4611 15.8665 9.1446 14.3885 7.0545 54.8607 61.9152 31
N50_M10_R0 50 0.1 0.00 Listwise Deletion -3.7652 5.0681 -3.4512 0.8854 -2.2652 3.0681 -2.4512 0.3854 17.0685 24.1140 20.0645 2.6432 8.1699 63.8902 72.0601 32
N50_M20_R0 50 0.2 0.00 Expectation-Maximization -5.8328 11.0430 -9.2932 4.7475 -4.3328 9.0430 -8.2932 4.2475 38.1907 83.9585 79.5541 40.4783 25.9165 242.1816 268.0981 33
N50_M10_R0 50 0.1 0.00 Expectation-Maximization -10.5024 10.1954 -10.2375 1.9771 -9.0024 8.1954 -9.2375 1.4771 80.2497 68.8023 83.4366 10.1298 27.9124 242.6184 270.5308 34
N50_M20_R0 50 0.2 0.00 Listwise Deletion -18.3777 24.3025 -9.1396 11.8350 -16.8777 22.3025 -8.1396 11.3350 93.3333 117.6493 46.0217 85.8005 58.6548 342.8048 401.4596 35
N50_M0_R0 50 0.0 0.00 Listwise Deletion -8921656399256.8730 13783096525028.1719 -6792955757239.3115 6751896042500.7090 -8921656399255.3730 13783096525026.1719 -6792955757238.3115 6751896042500.2090 89216563992506.1719 137830965250204.9688 67929557572358.7891 67518960424989.7734 36249604724020.0703 362496047240059.6875 398745651964079.7500 36
write.csv(
  ranking_df %>% slice_tail(n = 10),
  "tables/tab5_bottom10.csv",
  row.names = FALSE
)

34 Ringkasan Berdasarkan Metode

method_summary <- ranking_df %>%
  group_by(method) %>%
  summarise(
    Mean_Total_Error = mean(Total_Error),
    Median_Total_Error = median(Total_Error),
    Min_Total_Error = min(Total_Error),
    Max_Total_Error = max(Total_Error),
    .groups = "drop"
  )

kable(
  method_summary,
  digits = 4,
  caption =
    "Ringkasan Total Error Berdasarkan Metode"
)
Ringkasan Total Error Berdasarkan Metode
method Mean_Total_Error Median_Total_Error Min_Total_Error Max_Total_Error
Listwise Deletion 22152536220257.9453 1.8765 0.5076 398745651964079.7500
Expectation-Maximization 32.3547 1.7008 0.4876 270.5308
write.csv(
  method_summary,
  "tables/tab6_method_summary.csv",
  row.names = FALSE
)

35 Error Estimasi Parameter

error_results <- all_results %>%
  mutate(
    Error_B0 = beta0hat - beta_true["beta0"],
    Error_B1 = beta1hat - beta_true["beta1"],
    Error_B2 = beta2hat - beta_true["beta2"],
    Error_B3 = beta3hat - beta_true["beta3"]
  )

36 Format Long

error_long <- error_results %>%
  select(
    scenario,
    n,
    missing,
    rho,
    method,
    starts_with("Error_")
  ) %>%
  pivot_longer(
    cols = starts_with("Error_"),
    names_to = "Parameter",
    values_to = "Error"
  )

37 Nama Parameter

error_long$Parameter <- recode(
  error_long$Parameter,
  Error_B0 = "β0",
  Error_B1 = "β1",
  Error_B2 = "β2",
  Error_B3 = "β3"
)
error_long_plot <- error_long %>%
  filter(
    !(
      scenario == "N50_M0_R0" &
      method == "Listwise Deletion" &
      abs(Error) > 100
    )
  )

38 Pemeriksaan Data Error

head(error_long)
## # A tibble: 6 × 7
##   scenario      n missing   rho method            Parameter  Error
##   <chr>     <dbl>   <dbl> <dbl> <fct>             <chr>      <dbl>
## 1 N50_M0_R0    50       0     0 Listwise Deletion β0         1.08 
## 2 N50_M0_R0    50       0     0 Listwise Deletion β1         6.75 
## 3 N50_M0_R0    50       0     0 Listwise Deletion β2        -1.08 
## 4 N50_M0_R0    50       0     0 Listwise Deletion β3        -0.248
## 5 N50_M0_R0    50       0     0 Listwise Deletion β0         0.110
## 6 N50_M0_R0    50       0     0 Listwise Deletion β1         0.917

39 Observasi Error Ekstrem

extreme_errors <- error_long %>%
  filter(abs(Error) > 100) %>%
  arrange(desc(abs(Error)))

kable(
  extreme_errors,
  digits = 4,
  caption = "Observasi Error Ekstrem"
)
Observasi Error Ekstrem
scenario n missing rho method Parameter Error
N50_M0_R0 50 0.0 0.00 Listwise Deletion β1 1378309652502049.7500
N50_M0_R0 50 0.0 0.00 Listwise Deletion β0 -892165639925061.7500
N50_M0_R0 50 0.0 0.00 Listwise Deletion β2 -679295575723587.8750
N50_M0_R0 50 0.0 0.00 Listwise Deletion β3 675189604249897.7500
N50_M20_R0 50 0.2 0.00 Listwise Deletion β1 918.1074
N50_M20_R0 50 0.2 0.00 Expectation-Maximization β1 839.4485
N50_M10_R0 50 0.1 0.00 Expectation-Maximization β2 -832.2075
N50_M20_R0 50 0.2 0.00 Listwise Deletion β3 826.5212
N50_M10_R0 50 0.1 0.00 Expectation-Maximization β0 -798.4751
N50_M20_R0 50 0.2 0.00 Expectation-Maximization β2 -795.4612
N50_M20_R0 50 0.2 0.00 Listwise Deletion β0 -761.5975
N50_M10_R0 50 0.1 0.00 Expectation-Maximization β1 681.2411
N50_M20_R0 50 0.2 0.00 Listwise Deletion β1 598.9947
N50_M20_R0 50 0.2 0.00 Listwise Deletion β0 -448.7020
N50_M20_R0 50 0.2 0.00 Listwise Deletion β1 409.7842
N50_M0_R0 50 0.0 0.00 Listwise Deletion β1 408.8473
N50_M20_R0 50 0.2 0.00 Expectation-Maximization β3 404.6730
N50_M0_R0 50 0.0 0.00 Listwise Deletion β0 -383.4881
N50_M20_R0 50 0.2 0.00 Expectation-Maximization β0 -381.6583
N50_M20_R0 50 0.2 0.00 Listwise Deletion β2 -347.3918
N50_M20_R0 50 0.2 0.00 Listwise Deletion β0 -284.4444
N50_M20_R0 50 0.2 0.00 Listwise Deletion β2 -282.7119
N50_M10_R0 50 0.1 0.00 Listwise Deletion β1 240.2634
N50_M20_R0 50 0.2 0.00 Listwise Deletion β3 216.7945
N50_M10_R0 50 0.1 0.00 Listwise Deletion β2 -200.1207
N50_M0_R0 50 0.0 0.00 Listwise Deletion β2 -181.8760
N50_M10_R0 50 0.1 0.00 Listwise Deletion β0 -169.8985
N50_M20_R95 50 0.2 0.95 Listwise Deletion β1 156.3799
N50_M20_R95 50 0.2 0.95 Listwise Deletion β0 -154.2978
N50_M20_R95 50 0.2 0.95 Listwise Deletion β3 143.7439
write.csv(
  extreme_errors,
  "tables/tab8_extreme_errors.csv",
  row.names = FALSE
)

Tabel di atas menunjukkan observasi dengan error yang sangat besar. Nilai tersebut berasal dari fenomena complete separation pada regresi logistik, yang menyebabkan estimasi koefisien divergen ke nilai sangat besar.

40 Ringkasan Error

error_summary <- error_long %>%
  group_by(Parameter, method) %>%
  summarise(
    Mean_Error = mean(Error, na.rm = TRUE),
    SD_Error = sd(Error, na.rm = TRUE),
    .groups = "drop"
  )

kable(
  error_summary,
  digits = 4,
  caption = "Ringkasan Error Estimasi"
)
Ringkasan Error Estimasi
Parameter method Mean_Error SD_Error
β0 Listwise Deletion -495647577737.6312 21028545797754.8594
β0 Expectation-Maximization -0.8262 20.9417
β1 Listwise Deletion 765727584725.2792 32487070061969.0625
β1 Expectation-Maximization 1.1024 25.5980
β2 Listwise Deletion -377386430958.3612 16011150267472.2773
β2 Expectation-Maximization -1.0555 27.1813
β3 Listwise Deletion 375105335695.2217 15914371591725.4492
β3 Expectation-Maximization 0.3426 9.8368
write.csv(
  error_summary,
  "tables/tab7_error_summary.csv",
  row.names = FALSE
)
n_extreme <- error_long %>%
  filter(abs(Error) > 100) %>%
  nrow()

n_extreme
## [1] 30

Catatan:

Berdasarkan hasil simulasi, ditemukan 4 observasi dengan error ekstrem yang seluruhnya berasal dari skenario N50_M0_R0 pada metode Listwise Deletion. Nilai error yang sangat besar tersebut diduga muncul akibat fenomena complete separation pada regresi logistik biner yang menyebabkan estimasi parameter divergen ke nilai yang sangat besar.

Observasi tersebut tetap dipertahankan pada data hasil simulasi dan perhitungan numerik. Namun, untuk menjaga keterbacaan visualisasi, observasi ekstrem tersebut dikecualikan dari boxplot sehingga pola distribusi error pada mayoritas replikasi dapat diamati dengan lebih jelas.

41 Boxplot Error Seluruh Parameter

p1 <- ggplot(
  error_long_plot,
  aes(x = scenario,
      y = Error)
) +
  geom_boxplot() +
  geom_hline(
    yintercept = 0,
    linetype = "dashed"
  ) +
  facet_grid(
    Parameter ~ method,
    scales = "free_y"
  ) +
  labs(
    title = "Distribusi Error Estimasi Parameter",
    subtitle = expression(hat(beta)-beta),
    x = "Kombinasi Faktor",
    y = "Error"
  ) +
  theme_bw() +
  theme(
    axis.text.x =
      element_text(
        angle = 90,
        hjust = 1
      )
  )

print(p1)

ggsave(
  "figures/fig1_error_boxplot.png",
  p1,
  width = 14,
  height = 8,
  dpi = 300
)

42 Boxplot Berdasarkan Missing Value

p2 <- ggplot(
  error_long_plot,
  aes(
    x = factor(missing),
    y = Error,
    fill = factor(missing)
  )
) +
  geom_boxplot() +
  facet_grid(
    Parameter ~ method,
    scales = "free_y"
  ) +
  labs(
    title = "Pengaruh Missing Value",
    x = "Proporsi Missing",
    y = "Error",
    fill = "Missing"
  ) +
  theme_bw()

print(p2)

ggsave(
  "figures/fig2_missing_effect.png",
  p2,
  width = 12,
  height = 8,
  dpi = 300
)

43 Boxplot Berdasarkan Multikolinearitas

p3 <- ggplot(
  error_long_plot,
  aes(
    x = factor(rho),
    y = Error,
    fill = factor(rho)
  )
) +
  geom_boxplot() +
  facet_grid(
    Parameter ~ method,
    scales = "free_y"
  ) +
  labs(
    title = "Pengaruh Multikolinearitas",
    x = expression(rho),
    y = "Error",
    fill = expression(rho)
  ) +
  theme_bw()

print(p3)

ggsave(
  "figures/fig3_rho_effect.png",
  p3,
  width = 12,
  height = 8,
  dpi = 300
)

44 Boxplot Berdasarkan Ukuran Sampel

p4 <- ggplot(
  error_long_plot,
  aes(
    x = factor(n),
    y = Error,
    fill = factor(n)
  )
) +
  geom_boxplot() +
  facet_grid(
    Parameter ~ method,
    scales = "free_y"
  ) +
  labs(
    title = "Pengaruh Ukuran Sampel",
    x = "Ukuran Sampel",
    y = "Error",
    fill = "n"
  ) +
  theme_bw()

print(p4)

ggsave(
  "figures/fig4_sample_size_effect.png",
  p4,
  width = 12,
  height = 8,
  dpi = 300
)