Penelitian ini menggunakan simulasi Monte Carlo untuk mengevaluasi kinerja estimasi parameter regresi logistik biner pada berbagai kombinasi ukuran sampel, proporsi missing value, tingkat multikolinearitas, dan metode penanganan missing value.
Metode yang dibandingkan adalah:
Evaluasi dilakukan menggunakan Mean Estimate, Bias, RMSE, dan distribusi error estimasi parameter terhadap parameter sebenarnya.
library(MASS)
library(Amelia)
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
set.seed(42)
beta_true <- c(beta0 = -1.5,
beta1 = 2.0,
beta2 = -1.0,
beta3 = 0.5)
beta_true
## beta0 beta1 beta2 beta3
## -1.5 2.0 -1.0 0.5
generate_data <- function(n, rho){
Sigma <- matrix(c(1,rho,0,
rho,1,0,
0,0,1),
nrow = 3, byrow = TRUE)
X <- MASS::mvrnorm(n, mu = c(0,0,0), Sigma = Sigma)
x1 <- X[,1]
x2 <- X[,2]
x3 <- X[,3]
eta <- beta_true["beta0"] +
beta_true["beta1"]*x1 +
beta_true["beta2"]*x2 +
beta_true["beta3"]*x3
data.frame(
y = rbinom(n, 1, plogis(eta)),
x1, x2, x3
)
}
Missing value hanya diberikan pada variabel prediktor x1 dan x2.
create_missing <- function(df, prop){
if(prop == 0) return(df)
n <- nrow(df)
miss_x1 <- sample(n, round(prop*n))
miss_x2 <- sample(n, round(prop*n))
df$x1[miss_x1] <- NA
df$x2[miss_x2] <- NA
df
}
estimate_listwise <- function(df){
tryCatch({
df <- na.omit(df)
if(nrow(df) < 20)
return(setNames(rep(NA,4),
c("beta0","beta1","beta2","beta3")))
fit <- glm(y ~ x1 + x2 + x3,
family = binomial(),
data = df,
control = glm.control(maxit = 100))
est <- coef(fit)
setNames(
c(
unname(est["(Intercept)"]),
unname(est["x1"]),
unname(est["x2"]),
unname(est["x3"])
),
c("beta0","beta1","beta2","beta3")
)
}, error = function(e)
setNames(rep(NA,4),
c("beta0","beta1","beta2","beta3")))
}
Menggunakan package Amelia dengan m = 1. Pada kondisi tanpa missing value, data langsung digunakan tanpa proses imputasi.
estimate_em <- function(df){
tryCatch({
if(sum(is.na(df)) == 0){
df_imp <- df
} else {
imp <- amelia(df, m = 1, noms = "y", p2s = 0)
df_imp <- imp$imputations[[1]]
}
fit <- glm(y ~ x1 + x2 + x3,
family = binomial(),
data = df_imp,
control = glm.control(maxit = 100))
est <- coef(fit)
setNames(
c(
unname(est["(Intercept)"]),
unname(est["x1"]),
unname(est["x2"]),
unname(est["x3"])
),
c("beta0","beta1","beta2","beta3")
)
}, error = function(e)
setNames(rep(NA,4),
c("beta0","beta1","beta2","beta3")))
}
design <- expand.grid(
n = c(50, 200, 1000),
missing = c(0, 0.10, 0.20),
rho = c(0, 0.95),
method = c("Listwise Deletion",
"Expectation-Maximization")
)
design$scenario <- paste0(
"N", design$n,
"_M", design$missing*100,
"_R", ifelse(design$rho == 0, "0", "95")
)
kable(design,
caption = "Desain Faktorial Simulasi")
| n | missing | rho | method | scenario |
|---|---|---|---|---|
| 50 | 0.0 | 0.00 | Listwise Deletion | N50_M0_R0 |
| 200 | 0.0 | 0.00 | Listwise Deletion | N200_M0_R0 |
| 1000 | 0.0 | 0.00 | Listwise Deletion | N1000_M0_R0 |
| 50 | 0.1 | 0.00 | Listwise Deletion | N50_M10_R0 |
| 200 | 0.1 | 0.00 | Listwise Deletion | N200_M10_R0 |
| 1000 | 0.1 | 0.00 | Listwise Deletion | N1000_M10_R0 |
| 50 | 0.2 | 0.00 | Listwise Deletion | N50_M20_R0 |
| 200 | 0.2 | 0.00 | Listwise Deletion | N200_M20_R0 |
| 1000 | 0.2 | 0.00 | Listwise Deletion | N1000_M20_R0 |
| 50 | 0.0 | 0.95 | Listwise Deletion | N50_M0_R95 |
| 200 | 0.0 | 0.95 | Listwise Deletion | N200_M0_R95 |
| 1000 | 0.0 | 0.95 | Listwise Deletion | N1000_M0_R95 |
| 50 | 0.1 | 0.95 | Listwise Deletion | N50_M10_R95 |
| 200 | 0.1 | 0.95 | Listwise Deletion | N200_M10_R95 |
| 1000 | 0.1 | 0.95 | Listwise Deletion | N1000_M10_R95 |
| 50 | 0.2 | 0.95 | Listwise Deletion | N50_M20_R95 |
| 200 | 0.2 | 0.95 | Listwise Deletion | N200_M20_R95 |
| 1000 | 0.2 | 0.95 | Listwise Deletion | N1000_M20_R95 |
| 50 | 0.0 | 0.00 | Expectation-Maximization | N50_M0_R0 |
| 200 | 0.0 | 0.00 | Expectation-Maximization | N200_M0_R0 |
| 1000 | 0.0 | 0.00 | Expectation-Maximization | N1000_M0_R0 |
| 50 | 0.1 | 0.00 | Expectation-Maximization | N50_M10_R0 |
| 200 | 0.1 | 0.00 | Expectation-Maximization | N200_M10_R0 |
| 1000 | 0.1 | 0.00 | Expectation-Maximization | N1000_M10_R0 |
| 50 | 0.2 | 0.00 | Expectation-Maximization | N50_M20_R0 |
| 200 | 0.2 | 0.00 | Expectation-Maximization | N200_M20_R0 |
| 1000 | 0.2 | 0.00 | Expectation-Maximization | N1000_M20_R0 |
| 50 | 0.0 | 0.95 | Expectation-Maximization | N50_M0_R95 |
| 200 | 0.0 | 0.95 | Expectation-Maximization | N200_M0_R95 |
| 1000 | 0.0 | 0.95 | Expectation-Maximization | N1000_M0_R95 |
| 50 | 0.1 | 0.95 | Expectation-Maximization | N50_M10_R95 |
| 200 | 0.1 | 0.95 | Expectation-Maximization | N200_M10_R95 |
| 1000 | 0.1 | 0.95 | Expectation-Maximization | N1000_M10_R95 |
| 50 | 0.2 | 0.95 | Expectation-Maximization | N50_M20_R95 |
| 200 | 0.2 | 0.95 | Expectation-Maximization | N200_M20_R95 |
| 1000 | 0.2 | 0.95 | Expectation-Maximization | N1000_M20_R95 |
write.csv(
design,
"tables/tab1_design.csv",
row.names = FALSE
)
Jumlah skenario:
nrow(design)
## [1] 36
n_rep <- 100
results_list <- vector("list",
nrow(design) * n_rep)
counter <- 1
for(s in seq_len(nrow(design))){
sc <- design[s,]
cat("Skenario", s, "dari",
nrow(design), "\n")
for(r in seq_len(n_rep)){
df <- generate_data(sc$n, sc$rho)
df <- create_missing(df, sc$missing)
est <- if(sc$method ==
"Listwise Deletion")
estimate_listwise(df)
else
estimate_em(df)
results_list[[counter]] <- data.frame(
scenario = sc$scenario,
n = sc$n,
missing = sc$missing,
rho = sc$rho,
method = sc$method,
replication = r,
beta0hat = est["beta0"],
beta1hat = est["beta1"],
beta2hat = est["beta2"],
beta3hat = est["beta3"]
)
counter <- counter + 1
}
}
## Skenario 1 dari 36
## Skenario 2 dari 36
## Skenario 3 dari 36
## Skenario 4 dari 36
## Skenario 5 dari 36
## Skenario 6 dari 36
## Skenario 7 dari 36
## Skenario 8 dari 36
## Skenario 9 dari 36
## Skenario 10 dari 36
## Skenario 11 dari 36
## Skenario 12 dari 36
## Skenario 13 dari 36
## Skenario 14 dari 36
## Skenario 15 dari 36
## Skenario 16 dari 36
## Skenario 17 dari 36
## Skenario 18 dari 36
## Skenario 19 dari 36
## Skenario 20 dari 36
## Skenario 21 dari 36
## Skenario 22 dari 36
## Skenario 23 dari 36
## Skenario 24 dari 36
## Skenario 25 dari 36
## Skenario 26 dari 36
## Skenario 27 dari 36
## Skenario 28 dari 36
## Skenario 29 dari 36
## Skenario 30 dari 36
## Skenario 31 dari 36
## Skenario 32 dari 36
## Skenario 33 dari 36
## Skenario 34 dari 36
## Skenario 35 dari 36
## Skenario 36 dari 36
all_results <- bind_rows(results_list)
dim(all_results)
## [1] 3600 10
head(all_results)
## scenario n missing rho method replication beta0hat
## beta0...1 N50_M0_R0 50 0 0 Listwise Deletion 1 -0.4165615
## beta0...2 N50_M0_R0 50 0 0 Listwise Deletion 2 -1.3901099
## beta0...3 N50_M0_R0 50 0 0 Listwise Deletion 3 -1.3542189
## beta0...4 N50_M0_R0 50 0 0 Listwise Deletion 4 -3.3113345
## beta0...5 N50_M0_R0 50 0 0 Listwise Deletion 5 -1.9842905
## beta0...6 N50_M0_R0 50 0 0 Listwise Deletion 6 -3.6186759
## beta1hat beta2hat beta3hat
## beta0...1 8.749971 -2.0760010 0.2518683
## beta0...2 2.916689 -2.3362033 1.2138683
## beta0...3 2.564326 -0.4374358 0.8137437
## beta0...4 2.591129 -2.0547731 0.2150311
## beta0...5 2.854009 -1.5840223 1.2998989
## beta0...6 2.608669 -1.0873121 1.6483927
summary(all_results)
## scenario n missing rho
## Length:3600 Min. : 50.0 Min. :0.0 Min. :0.000
## Class :character 1st Qu.: 50.0 1st Qu.:0.0 1st Qu.:0.000
## Mode :character Median : 200.0 Median :0.1 Median :0.475
## Mean : 416.7 Mean :0.1 Mean :0.475
## 3rd Qu.:1000.0 3rd Qu.:0.2 3rd Qu.:0.950
## Max. :1000.0 Max. :0.2 Max. :0.950
## method replication beta0hat
## Listwise Deletion :1800 Min. : 1.00 Min. :-892165639925000
## Expectation-Maximization:1800 1st Qu.: 25.75 1st Qu.: -2
## Median : 50.50 Median : -2
## Mean : 50.50 Mean : -247823788871
## 3rd Qu.: 75.25 3rd Qu.: -1
## Max. :100.00 Max. : 0
## beta1hat beta2hat
## Min. : -7 Min. :-679295575724000
## 1st Qu.: 2 1st Qu.: -1
## Median : 2 Median : -1
## Mean : 382863792365 Mean : -188693215481
## 3rd Qu.: 2 3rd Qu.: -1
## Max. :1378309652500000 Max. : 10
## beta3hat
## Min. : -29
## 1st Qu.: 0
## Median : 1
## Mean : 187552667848
## 3rd Qu.: 1
## Max. :675189604250000
write.csv(all_results,
"all_results.csv",
row.names = FALSE)
all_results %>%
count(scenario, method) %>%
kable(caption =
"Jumlah Replikasi per Skenario")
| scenario | method | n |
|---|---|---|
| N1000_M0_R0 | Listwise Deletion | 100 |
| N1000_M0_R0 | Expectation-Maximization | 100 |
| N1000_M0_R95 | Listwise Deletion | 100 |
| N1000_M0_R95 | Expectation-Maximization | 100 |
| N1000_M10_R0 | Listwise Deletion | 100 |
| N1000_M10_R0 | Expectation-Maximization | 100 |
| N1000_M10_R95 | Listwise Deletion | 100 |
| N1000_M10_R95 | Expectation-Maximization | 100 |
| N1000_M20_R0 | Listwise Deletion | 100 |
| N1000_M20_R0 | Expectation-Maximization | 100 |
| N1000_M20_R95 | Listwise Deletion | 100 |
| N1000_M20_R95 | Expectation-Maximization | 100 |
| N200_M0_R0 | Listwise Deletion | 100 |
| N200_M0_R0 | Expectation-Maximization | 100 |
| N200_M0_R95 | Listwise Deletion | 100 |
| N200_M0_R95 | Expectation-Maximization | 100 |
| N200_M10_R0 | Listwise Deletion | 100 |
| N200_M10_R0 | Expectation-Maximization | 100 |
| N200_M10_R95 | Listwise Deletion | 100 |
| N200_M10_R95 | Expectation-Maximization | 100 |
| N200_M20_R0 | Listwise Deletion | 100 |
| N200_M20_R0 | Expectation-Maximization | 100 |
| N200_M20_R95 | Listwise Deletion | 100 |
| N200_M20_R95 | Expectation-Maximization | 100 |
| N50_M0_R0 | Listwise Deletion | 100 |
| N50_M0_R0 | Expectation-Maximization | 100 |
| N50_M0_R95 | Listwise Deletion | 100 |
| N50_M0_R95 | Expectation-Maximization | 100 |
| N50_M10_R0 | Listwise Deletion | 100 |
| N50_M10_R0 | Expectation-Maximization | 100 |
| N50_M10_R95 | Listwise Deletion | 100 |
| N50_M10_R95 | Expectation-Maximization | 100 |
| N50_M20_R0 | Listwise Deletion | 100 |
| N50_M20_R0 | Expectation-Maximization | 100 |
| N50_M20_R95 | Listwise Deletion | 100 |
| N50_M20_R95 | Expectation-Maximization | 100 |
design %>%
distinct(n, missing) %>%
mutate(
Missing_Obs = round(n*missing)
) %>%
arrange(n, missing) %>%
kable(
caption =
"Jumlah Missing Value yang Dibangkitkan"
)
| n | missing | Missing_Obs |
|---|---|---|
| 50 | 0.0 | 0 |
| 50 | 0.1 | 5 |
| 50 | 0.2 | 10 |
| 200 | 0.0 | 0 |
| 200 | 0.1 | 20 |
| 200 | 0.2 | 40 |
| 1000 | 0.0 | 0 |
| 1000 | 0.1 | 100 |
| 1000 | 0.2 | 200 |
summary_results <- all_results %>%
group_by(scenario, n, missing, rho, method) %>%
summarise(
Mean_B0 = mean(beta0hat, na.rm = TRUE),
Mean_B1 = mean(beta1hat, na.rm = TRUE),
Mean_B2 = mean(beta2hat, na.rm = TRUE),
Mean_B3 = mean(beta3hat, na.rm = TRUE),
.groups = "drop"
)
summary_results <- summary_results %>%
mutate(
Bias_B0 = Mean_B0 - beta_true["beta0"],
Bias_B1 = Mean_B1 - beta_true["beta1"],
Bias_B2 = Mean_B2 - beta_true["beta2"],
Bias_B3 = Mean_B3 - beta_true["beta3"]
)
rmse_results <- all_results %>%
group_by(scenario, n, missing, rho, method) %>%
summarise(
RMSE_B0 = sqrt(mean((beta0hat - beta_true["beta0"])^2, na.rm = TRUE)),
RMSE_B1 = sqrt(mean((beta1hat - beta_true["beta1"])^2, na.rm = TRUE)),
RMSE_B2 = sqrt(mean((beta2hat - beta_true["beta2"])^2, na.rm = TRUE)),
RMSE_B3 = sqrt(mean((beta3hat - beta_true["beta3"])^2, na.rm = TRUE)),
.groups = "drop"
)
summary_results <- left_join(
summary_results,
rmse_results,
by = c(
"scenario",
"n",
"missing",
"rho",
"method"
)
) %>%
mutate(
across(
where(is.numeric),
~ round(.x, 4)
)
)
kable(
summary_results,
digits = 4,
caption = "Mean Estimate, Bias, dan RMSE"
)
| scenario | n | missing | rho | method | Mean_B0 | Mean_B1 | Mean_B2 | Mean_B3 | Bias_B0 | Bias_B1 | Bias_B2 | Bias_B3 | RMSE_B0 | RMSE_B1 | RMSE_B2 | RMSE_B3 |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| N1000_M0_R0 | 1000 | 0.0 | 0.00 | Listwise Deletion | -1.5032 | 2.0175 | -1.0125 | 0.4996 | -0.0032 | 0.0175 | -0.0125 | -0.0004 | 0.1211 | 0.1377 | 0.1137 | 0.1015 |
| N1000_M0_R0 | 1000 | 0.0 | 0.00 | Expectation-Maximization | -1.5021 | 2.0213 | -0.9870 | 0.5015 | -0.0021 | 0.0213 | 0.0130 | 0.0015 | 0.1091 | 0.1398 | 0.1030 | 0.0978 |
| N1000_M0_R95 | 1000 | 0.0 | 0.95 | Listwise Deletion | -1.5180 | 2.0226 | -0.9973 | 0.5117 | -0.0180 | 0.0226 | 0.0027 | 0.0117 | 0.1002 | 0.3034 | 0.2797 | 0.0935 |
| N1000_M0_R95 | 1000 | 0.0 | 0.95 | Expectation-Maximization | -1.5055 | 1.9878 | -0.9792 | 0.4919 | -0.0055 | -0.0122 | 0.0208 | -0.0081 | 0.0955 | 0.3190 | 0.2892 | 0.0857 |
| N1000_M10_R0 | 1000 | 0.1 | 0.00 | Listwise Deletion | -1.5078 | 2.0278 | -1.0127 | 0.4838 | -0.0078 | 0.0278 | -0.0127 | -0.0162 | 0.1275 | 0.1660 | 0.1136 | 0.1063 |
| N1000_M10_R0 | 1000 | 0.1 | 0.00 | Expectation-Maximization | -1.4875 | 1.9815 | -0.9757 | 0.4928 | 0.0125 | -0.0185 | 0.0243 | -0.0072 | 0.1434 | 0.1444 | 0.1212 | 0.1041 |
| N1000_M10_R95 | 1000 | 0.1 | 0.95 | Listwise Deletion | -1.5162 | 2.0336 | -1.0205 | 0.4892 | -0.0162 | 0.0336 | -0.0205 | -0.0108 | 0.1086 | 0.3506 | 0.3177 | 0.0943 |
| N1000_M10_R95 | 1000 | 0.1 | 0.95 | Expectation-Maximization | -1.4970 | 2.0034 | -0.9974 | 0.4942 | 0.0030 | 0.0034 | 0.0026 | -0.0058 | 0.1014 | 0.3541 | 0.3304 | 0.0766 |
| N1000_M20_R0 | 1000 | 0.2 | 0.00 | Listwise Deletion | -1.5028 | 2.0060 | -0.9950 | 0.5035 | -0.0028 | 0.0060 | 0.0050 | 0.0035 | 0.1201 | 0.1665 | 0.1396 | 0.1104 |
| N1000_M20_R0 | 1000 | 0.2 | 0.00 | Expectation-Maximization | -1.4765 | 1.9838 | -0.9913 | 0.4951 | 0.0235 | -0.0162 | 0.0087 | -0.0049 | 0.1203 | 0.1626 | 0.1293 | 0.1248 |
| N1000_M20_R95 | 1000 | 0.2 | 0.95 | Listwise Deletion | -1.5225 | 2.0569 | -1.0569 | 0.5170 | -0.0225 | 0.0569 | -0.0569 | 0.0170 | 0.1296 | 0.3672 | 0.3366 | 0.1185 |
| N1000_M20_R95 | 1000 | 0.2 | 0.95 | Expectation-Maximization | -1.5110 | 2.0108 | -1.0124 | 0.5027 | -0.0110 | 0.0108 | -0.0124 | 0.0027 | 0.0915 | 0.4305 | 0.4108 | 0.0930 |
| N200_M0_R0 | 200 | 0.0 | 0.00 | Listwise Deletion | -1.4980 | 2.0200 | -1.0090 | 0.4931 | 0.0020 | 0.0200 | -0.0090 | -0.0069 | 0.2628 | 0.3127 | 0.2205 | 0.2346 |
| N200_M0_R0 | 200 | 0.0 | 0.00 | Expectation-Maximization | -1.5242 | 2.0451 | -1.0283 | 0.5209 | -0.0242 | 0.0451 | -0.0283 | 0.0209 | 0.3135 | 0.3690 | 0.2491 | 0.2890 |
| N200_M0_R95 | 200 | 0.0 | 0.95 | Listwise Deletion | -1.5850 | 2.1087 | -1.0752 | 0.4753 | -0.0850 | 0.1087 | -0.0752 | -0.0247 | 0.2496 | 0.6889 | 0.6572 | 0.1893 |
| N200_M0_R95 | 200 | 0.0 | 0.95 | Expectation-Maximization | -1.5474 | 2.0869 | -1.0712 | 0.5435 | -0.0474 | 0.0869 | -0.0712 | 0.0435 | 0.2361 | 0.6675 | 0.6217 | 0.2148 |
| N200_M10_R0 | 200 | 0.1 | 0.00 | Listwise Deletion | -1.5724 | 2.1468 | -1.0374 | 0.5253 | -0.0724 | 0.1468 | -0.0374 | 0.0253 | 0.3192 | 0.4706 | 0.3097 | 0.2930 |
| N200_M10_R0 | 200 | 0.1 | 0.00 | Expectation-Maximization | -1.5302 | 2.0597 | -0.9954 | 0.5152 | -0.0302 | 0.0597 | 0.0046 | 0.0152 | 0.2768 | 0.3089 | 0.3041 | 0.2761 |
| N200_M10_R95 | 200 | 0.1 | 0.95 | Listwise Deletion | -1.5736 | 2.0860 | -1.0408 | 0.5233 | -0.0736 | 0.0860 | -0.0408 | 0.0233 | 0.2525 | 0.8098 | 0.7098 | 0.2501 |
| N200_M10_R95 | 200 | 0.1 | 0.95 | Expectation-Maximization | -1.5550 | 1.9691 | -1.0081 | 0.5382 | -0.0550 | -0.0309 | -0.0081 | 0.0382 | 0.2552 | 0.7842 | 0.7657 | 0.2209 |
| N200_M20_R0 | 200 | 0.2 | 0.00 | Listwise Deletion | -1.5440 | 2.0466 | -1.0007 | 0.5216 | -0.0440 | 0.0466 | -0.0007 | 0.0216 | 0.3597 | 0.4269 | 0.2975 | 0.3560 |
| N200_M20_R0 | 200 | 0.2 | 0.00 | Expectation-Maximization | -1.5067 | 2.0664 | -1.0390 | 0.5219 | -0.0067 | 0.0664 | -0.0390 | 0.0219 | 0.2967 | 0.4336 | 0.3259 | 0.2224 |
| N200_M20_R95 | 200 | 0.2 | 0.95 | Listwise Deletion | -1.5915 | 2.3596 | -1.2573 | 0.5550 | -0.0915 | 0.3596 | -0.2573 | 0.0550 | 0.3540 | 0.9277 | 0.8679 | 0.2960 |
| N200_M20_R95 | 200 | 0.2 | 0.95 | Expectation-Maximization | -1.5643 | 2.0792 | -1.0165 | 0.5505 | -0.0643 | 0.0792 | -0.0165 | 0.0505 | 0.2936 | 1.0723 | 0.9625 | 0.2418 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | -8921656399256.8730 | 13783096525028.1719 | -6792955757239.3115 | 6751896042500.7090 | -8921656399255.3730 | 13783096525026.1719 | -6792955757238.3115 | 6751896042500.2090 | 89216563992506.1719 | 137830965250204.9688 | 67929557572358.7891 | 67518960424989.7734 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Expectation-Maximization | -1.9516 | 2.7288 | -1.3012 | 0.5796 | -0.4516 | 0.7288 | -0.3012 | 0.0796 | 1.1187 | 1.8790 | 0.8318 | 0.5330 |
| N50_M0_R95 | 50 | 0.0 | 0.95 | Listwise Deletion | -1.7187 | 2.3964 | -1.1983 | 0.6379 | -0.2187 | 0.3964 | -0.1983 | 0.1379 | 0.5700 | 1.6725 | 1.4311 | 0.5338 |
| N50_M0_R95 | 50 | 0.0 | 0.95 | Expectation-Maximization | -1.7280 | 2.4643 | -1.2907 | 0.4772 | -0.2280 | 0.4643 | -0.2907 | -0.0228 | 0.5498 | 1.6919 | 1.6496 | 0.5113 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Listwise Deletion | -3.7652 | 5.0681 | -3.4512 | 0.8854 | -2.2652 | 3.0681 | -2.4512 | 0.3854 | 17.0685 | 24.1140 | 20.0645 | 2.6432 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Expectation-Maximization | -10.5024 | 10.1954 | -10.2375 | 1.9771 | -9.0024 | 8.1954 | -9.2375 | 1.4771 | 80.2497 | 68.8023 | 83.4366 | 10.1298 |
| N50_M10_R95 | 50 | 0.1 | 0.95 | Listwise Deletion | -1.7707 | 1.9063 | -0.6668 | 0.6593 | -0.2707 | -0.0937 | 0.3332 | 0.1593 | 0.7302 | 2.1027 | 2.0449 | 0.6105 |
| N50_M10_R95 | 50 | 0.1 | 0.95 | Expectation-Maximization | -1.7794 | 2.7771 | -1.5997 | 0.6769 | -0.2794 | 0.7771 | -0.5997 | 0.1769 | 0.8122 | 3.0857 | 2.7531 | 0.6210 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | -18.3777 | 24.3025 | -9.1396 | 11.8350 | -16.8777 | 22.3025 | -8.1396 | 11.3350 | 93.3333 | 117.6493 | 46.0217 | 85.8005 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Expectation-Maximization | -5.8328 | 11.0430 | -9.2932 | 4.7475 | -4.3328 | 9.0430 | -8.2932 | 4.2475 | 38.1907 | 83.9585 | 79.5541 | 40.4783 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Listwise Deletion | -3.4211 | 4.2490 | -2.2189 | 2.1655 | -1.9211 | 2.2490 | -1.2189 | 1.6655 | 15.4611 | 15.8665 | 9.1446 | 14.3885 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Expectation-Maximization | -1.8694 | 2.3390 | -1.1760 | 0.5405 | -0.3694 | 0.3390 | -0.1760 | 0.0405 | 0.7820 | 2.8619 | 2.7253 | 0.6399 |
write.csv(
summary_results,
"tables/tab2_summary_results.csv",
row.names = FALSE
)
summary_results <- summary_results %>%
mutate(
Total_Abs_Bias =
abs(Bias_B0) +
abs(Bias_B1) +
abs(Bias_B2) +
abs(Bias_B3)
)
summary_results <- summary_results %>%
mutate(
Total_RMSE =
RMSE_B0 +
RMSE_B1 +
RMSE_B2 +
RMSE_B3
)
summary_results <- summary_results %>%
mutate(
Total_Error =
Total_Abs_Bias +
Total_RMSE
)
anova_data <- summary_results %>%
select(
n,
missing,
rho,
method,
Total_Error
)
anova_model <- lm(
Total_Error ~
factor(n) *
factor(missing) *
factor(rho) *
factor(method),
data = anova_data
)
anova_results <- anova(anova_model)
anova_results
## Analysis of Variance Table
##
## Response: Total_Error
## Df
## factor(n) 2
## factor(missing) 2
## factor(rho) 1
## factor(method) 1
## factor(n):factor(missing) 4
## factor(n):factor(rho) 2
## factor(missing):factor(rho) 2
## factor(n):factor(method) 2
## factor(missing):factor(method) 2
## factor(rho):factor(method) 1
## factor(n):factor(missing):factor(rho) 4
## factor(n):factor(missing):factor(method) 4
## factor(n):factor(rho):factor(method) 2
## factor(missing):factor(rho):factor(method) 2
## factor(n):factor(missing):factor(rho):factor(method) 4
## Residuals 0
## Sum Sq
## factor(n) 8833227497840815935068068642
## factor(missing) 8833227497768487862660844280
## factor(rho) 4416613748916314486086888488
## factor(method) 4416613748895659610448646660
## factor(n):factor(missing) 17666454995537250601660882460
## factor(n):factor(rho) 8833227497833239201880428046
## factor(missing):factor(rho) 8833227497771407065868008060
## factor(n):factor(method) 8833227497791246653808446664
## factor(missing):factor(method) 8833227497792170242460200604
## factor(rho):factor(method) 4416613748893374825286260862
## factor(n):factor(missing):factor(rho) 17666454995542941674662466286
## factor(n):factor(missing):factor(method) 17666454995584417451228800228
## factor(n):factor(rho):factor(method) 8833227497786750750686806440
## factor(missing):factor(rho):factor(method) 8833227497794590268024608820
## factor(n):factor(missing):factor(rho):factor(method) 17666454995588852881648686482
## Residuals 0
## Mean Sq
## factor(n) 4416613748920407967084084826
## factor(missing) 4416613748884243931880422640
## factor(rho) 4416613748916314486086888488
## factor(method) 4416613748895659610448646660
## factor(n):factor(missing) 4416613748884312650440228640
## factor(n):factor(rho) 4416613748916619600440264028
## factor(missing):factor(rho) 4416613748885703532484004080
## factor(n):factor(method) 4416613748895623326404228882
## factor(missing):factor(method) 4416613748896085121280600802
## factor(rho):factor(method) 4416613748893374825286260862
## factor(n):factor(missing):factor(rho) 4416613748885735418448644824
## factor(n):factor(missing):factor(method) 4416613748896104362882200882
## factor(n):factor(rho):factor(method) 4416613748893375375848408220
## factor(missing):factor(rho):factor(method) 4416613748897295134062804460
## factor(n):factor(missing):factor(rho):factor(method) 4416613748897213220462424628
## Residuals NaN
## F value Pr(>F)
## factor(n) NaN NaN
## factor(missing) NaN NaN
## factor(rho) NaN NaN
## factor(method) NaN NaN
## factor(n):factor(missing) NaN NaN
## factor(n):factor(rho) NaN NaN
## factor(missing):factor(rho) NaN NaN
## factor(n):factor(method) NaN NaN
## factor(missing):factor(method) NaN NaN
## factor(rho):factor(method) NaN NaN
## factor(n):factor(missing):factor(rho) NaN NaN
## factor(n):factor(missing):factor(method) NaN NaN
## factor(n):factor(rho):factor(method) NaN NaN
## factor(missing):factor(rho):factor(method) NaN NaN
## factor(n):factor(missing):factor(rho):factor(method) NaN NaN
## Residuals
write.csv(
as.data.frame(anova_results),
"tables/tab9_anova.csv"
)
ranking_df <- summary_results %>%
arrange(Total_Error) %>%
mutate(Ranking = row_number()) %>%
mutate(
across(
where(is.numeric),
~ round(.x, 4)
)
)
kable(
ranking_df,
digits = 4,
caption =
"Ranking Skenario Berdasarkan Total Error"
)
| scenario | n | missing | rho | method | Mean_B0 | Mean_B1 | Mean_B2 | Mean_B3 | Bias_B0 | Bias_B1 | Bias_B2 | Bias_B3 | RMSE_B0 | RMSE_B1 | RMSE_B2 | RMSE_B3 | Total_Abs_Bias | Total_RMSE | Total_Error | Ranking |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| N1000_M0_R0 | 1000 | 0.0 | 0.00 | Expectation-Maximization | -1.5021 | 2.0213 | -0.9870 | 0.5015 | -0.0021 | 0.0213 | 0.0130 | 0.0015 | 0.1091 | 0.1398 | 0.1030 | 0.0978 | 0.0379 | 0.4497 | 0.4876 | 1 |
| N1000_M0_R0 | 1000 | 0.0 | 0.00 | Listwise Deletion | -1.5032 | 2.0175 | -1.0125 | 0.4996 | -0.0032 | 0.0175 | -0.0125 | -0.0004 | 0.1211 | 0.1377 | 0.1137 | 0.1015 | 0.0336 | 0.4740 | 0.5076 | 2 |
| N1000_M20_R0 | 1000 | 0.2 | 0.00 | Listwise Deletion | -1.5028 | 2.0060 | -0.9950 | 0.5035 | -0.0028 | 0.0060 | 0.0050 | 0.0035 | 0.1201 | 0.1665 | 0.1396 | 0.1104 | 0.0173 | 0.5366 | 0.5539 | 3 |
| N1000_M10_R0 | 1000 | 0.1 | 0.00 | Expectation-Maximization | -1.4875 | 1.9815 | -0.9757 | 0.4928 | 0.0125 | -0.0185 | 0.0243 | -0.0072 | 0.1434 | 0.1444 | 0.1212 | 0.1041 | 0.0625 | 0.5131 | 0.5756 | 4 |
| N1000_M10_R0 | 1000 | 0.1 | 0.00 | Listwise Deletion | -1.5078 | 2.0278 | -1.0127 | 0.4838 | -0.0078 | 0.0278 | -0.0127 | -0.0162 | 0.1275 | 0.1660 | 0.1136 | 0.1063 | 0.0645 | 0.5134 | 0.5779 | 5 |
| N1000_M20_R0 | 1000 | 0.2 | 0.00 | Expectation-Maximization | -1.4765 | 1.9838 | -0.9913 | 0.4951 | 0.0235 | -0.0162 | 0.0087 | -0.0049 | 0.1203 | 0.1626 | 0.1293 | 0.1248 | 0.0533 | 0.5370 | 0.5903 | 6 |
| N1000_M0_R95 | 1000 | 0.0 | 0.95 | Listwise Deletion | -1.5180 | 2.0226 | -0.9973 | 0.5117 | -0.0180 | 0.0226 | 0.0027 | 0.0117 | 0.1002 | 0.3034 | 0.2797 | 0.0935 | 0.0550 | 0.7768 | 0.8318 | 7 |
| N1000_M0_R95 | 1000 | 0.0 | 0.95 | Expectation-Maximization | -1.5055 | 1.9878 | -0.9792 | 0.4919 | -0.0055 | -0.0122 | 0.0208 | -0.0081 | 0.0955 | 0.3190 | 0.2892 | 0.0857 | 0.0466 | 0.7894 | 0.8360 | 8 |
| N1000_M10_R95 | 1000 | 0.1 | 0.95 | Expectation-Maximization | -1.4970 | 2.0034 | -0.9974 | 0.4942 | 0.0030 | 0.0034 | 0.0026 | -0.0058 | 0.1014 | 0.3541 | 0.3304 | 0.0766 | 0.0148 | 0.8625 | 0.8773 | 9 |
| N1000_M10_R95 | 1000 | 0.1 | 0.95 | Listwise Deletion | -1.5162 | 2.0336 | -1.0205 | 0.4892 | -0.0162 | 0.0336 | -0.0205 | -0.0108 | 0.1086 | 0.3506 | 0.3177 | 0.0943 | 0.0811 | 0.8712 | 0.9523 | 10 |
| N1000_M20_R95 | 1000 | 0.2 | 0.95 | Expectation-Maximization | -1.5110 | 2.0108 | -1.0124 | 0.5027 | -0.0110 | 0.0108 | -0.0124 | 0.0027 | 0.0915 | 0.4305 | 0.4108 | 0.0930 | 0.0369 | 1.0258 | 1.0627 | 11 |
| N200_M0_R0 | 200 | 0.0 | 0.00 | Listwise Deletion | -1.4980 | 2.0200 | -1.0090 | 0.4931 | 0.0020 | 0.0200 | -0.0090 | -0.0069 | 0.2628 | 0.3127 | 0.2205 | 0.2346 | 0.0379 | 1.0306 | 1.0685 | 12 |
| N1000_M20_R95 | 1000 | 0.2 | 0.95 | Listwise Deletion | -1.5225 | 2.0569 | -1.0569 | 0.5170 | -0.0225 | 0.0569 | -0.0569 | 0.0170 | 0.1296 | 0.3672 | 0.3366 | 0.1185 | 0.1533 | 0.9519 | 1.1052 | 13 |
| N200_M10_R0 | 200 | 0.1 | 0.00 | Expectation-Maximization | -1.5302 | 2.0597 | -0.9954 | 0.5152 | -0.0302 | 0.0597 | 0.0046 | 0.0152 | 0.2768 | 0.3089 | 0.3041 | 0.2761 | 0.1097 | 1.1659 | 1.2756 | 14 |
| N200_M0_R0 | 200 | 0.0 | 0.00 | Expectation-Maximization | -1.5242 | 2.0451 | -1.0283 | 0.5209 | -0.0242 | 0.0451 | -0.0283 | 0.0209 | 0.3135 | 0.3690 | 0.2491 | 0.2890 | 0.1185 | 1.2206 | 1.3391 | 15 |
| N200_M20_R0 | 200 | 0.2 | 0.00 | Expectation-Maximization | -1.5067 | 2.0664 | -1.0390 | 0.5219 | -0.0067 | 0.0664 | -0.0390 | 0.0219 | 0.2967 | 0.4336 | 0.3259 | 0.2224 | 0.1340 | 1.2786 | 1.4126 | 16 |
| N200_M20_R0 | 200 | 0.2 | 0.00 | Listwise Deletion | -1.5440 | 2.0466 | -1.0007 | 0.5216 | -0.0440 | 0.0466 | -0.0007 | 0.0216 | 0.3597 | 0.4269 | 0.2975 | 0.3560 | 0.1129 | 1.4401 | 1.5530 | 17 |
| N200_M10_R0 | 200 | 0.1 | 0.00 | Listwise Deletion | -1.5724 | 2.1468 | -1.0374 | 0.5253 | -0.0724 | 0.1468 | -0.0374 | 0.0253 | 0.3192 | 0.4706 | 0.3097 | 0.2930 | 0.2819 | 1.3925 | 1.6744 | 18 |
| N200_M0_R95 | 200 | 0.0 | 0.95 | Expectation-Maximization | -1.5474 | 2.0869 | -1.0712 | 0.5435 | -0.0474 | 0.0869 | -0.0712 | 0.0435 | 0.2361 | 0.6675 | 0.6217 | 0.2148 | 0.2490 | 1.7401 | 1.9891 | 19 |
| N200_M0_R95 | 200 | 0.0 | 0.95 | Listwise Deletion | -1.5850 | 2.1087 | -1.0752 | 0.4753 | -0.0850 | 0.1087 | -0.0752 | -0.0247 | 0.2496 | 0.6889 | 0.6572 | 0.1893 | 0.2936 | 1.7850 | 2.0786 | 20 |
| N200_M10_R95 | 200 | 0.1 | 0.95 | Expectation-Maximization | -1.5550 | 1.9691 | -1.0081 | 0.5382 | -0.0550 | -0.0309 | -0.0081 | 0.0382 | 0.2552 | 0.7842 | 0.7657 | 0.2209 | 0.1322 | 2.0260 | 2.1582 | 21 |
| N200_M10_R95 | 200 | 0.1 | 0.95 | Listwise Deletion | -1.5736 | 2.0860 | -1.0408 | 0.5233 | -0.0736 | 0.0860 | -0.0408 | 0.0233 | 0.2525 | 0.8098 | 0.7098 | 0.2501 | 0.2237 | 2.0222 | 2.2459 | 22 |
| N200_M20_R95 | 200 | 0.2 | 0.95 | Expectation-Maximization | -1.5643 | 2.0792 | -1.0165 | 0.5505 | -0.0643 | 0.0792 | -0.0165 | 0.0505 | 0.2936 | 1.0723 | 0.9625 | 0.2418 | 0.2105 | 2.5702 | 2.7807 | 23 |
| N200_M20_R95 | 200 | 0.2 | 0.95 | Listwise Deletion | -1.5915 | 2.3596 | -1.2573 | 0.5550 | -0.0915 | 0.3596 | -0.2573 | 0.0550 | 0.3540 | 0.9277 | 0.8679 | 0.2960 | 0.7634 | 2.4456 | 3.2090 | 24 |
| N50_M0_R95 | 50 | 0.0 | 0.95 | Listwise Deletion | -1.7187 | 2.3964 | -1.1983 | 0.6379 | -0.2187 | 0.3964 | -0.1983 | 0.1379 | 0.5700 | 1.6725 | 1.4311 | 0.5338 | 0.9513 | 4.2074 | 5.1587 | 25 |
| N50_M0_R95 | 50 | 0.0 | 0.95 | Expectation-Maximization | -1.7280 | 2.4643 | -1.2907 | 0.4772 | -0.2280 | 0.4643 | -0.2907 | -0.0228 | 0.5498 | 1.6919 | 1.6496 | 0.5113 | 1.0058 | 4.4026 | 5.4084 | 26 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Expectation-Maximization | -1.9516 | 2.7288 | -1.3012 | 0.5796 | -0.4516 | 0.7288 | -0.3012 | 0.0796 | 1.1187 | 1.8790 | 0.8318 | 0.5330 | 1.5612 | 4.3625 | 5.9237 | 27 |
| N50_M10_R95 | 50 | 0.1 | 0.95 | Listwise Deletion | -1.7707 | 1.9063 | -0.6668 | 0.6593 | -0.2707 | -0.0937 | 0.3332 | 0.1593 | 0.7302 | 2.1027 | 2.0449 | 0.6105 | 0.8569 | 5.4883 | 6.3452 | 28 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Expectation-Maximization | -1.8694 | 2.3390 | -1.1760 | 0.5405 | -0.3694 | 0.3390 | -0.1760 | 0.0405 | 0.7820 | 2.8619 | 2.7253 | 0.6399 | 0.9249 | 7.0091 | 7.9340 | 29 |
| N50_M10_R95 | 50 | 0.1 | 0.95 | Expectation-Maximization | -1.7794 | 2.7771 | -1.5997 | 0.6769 | -0.2794 | 0.7771 | -0.5997 | 0.1769 | 0.8122 | 3.0857 | 2.7531 | 0.6210 | 1.8331 | 7.2720 | 9.1051 | 30 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Listwise Deletion | -3.4211 | 4.2490 | -2.2189 | 2.1655 | -1.9211 | 2.2490 | -1.2189 | 1.6655 | 15.4611 | 15.8665 | 9.1446 | 14.3885 | 7.0545 | 54.8607 | 61.9152 | 31 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Listwise Deletion | -3.7652 | 5.0681 | -3.4512 | 0.8854 | -2.2652 | 3.0681 | -2.4512 | 0.3854 | 17.0685 | 24.1140 | 20.0645 | 2.6432 | 8.1699 | 63.8902 | 72.0601 | 32 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Expectation-Maximization | -5.8328 | 11.0430 | -9.2932 | 4.7475 | -4.3328 | 9.0430 | -8.2932 | 4.2475 | 38.1907 | 83.9585 | 79.5541 | 40.4783 | 25.9165 | 242.1816 | 268.0981 | 33 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Expectation-Maximization | -10.5024 | 10.1954 | -10.2375 | 1.9771 | -9.0024 | 8.1954 | -9.2375 | 1.4771 | 80.2497 | 68.8023 | 83.4366 | 10.1298 | 27.9124 | 242.6184 | 270.5308 | 34 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | -18.3777 | 24.3025 | -9.1396 | 11.8350 | -16.8777 | 22.3025 | -8.1396 | 11.3350 | 93.3333 | 117.6493 | 46.0217 | 85.8005 | 58.6548 | 342.8048 | 401.4596 | 35 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | -8921656399256.8730 | 13783096525028.1719 | -6792955757239.3115 | 6751896042500.7090 | -8921656399255.3730 | 13783096525026.1719 | -6792955757238.3115 | 6751896042500.2090 | 89216563992506.1719 | 137830965250204.9688 | 67929557572358.7891 | 67518960424989.7734 | 36249604724020.0703 | 362496047240059.6875 | 398745651964079.7500 | 36 |
write.csv(
ranking_df,
"tables/tab3_ranking.csv",
row.names = FALSE
)
ranking_df %>%
slice_head(n = 10) %>%
kable(
digits = 4,
caption = "10 Skenario Terbaik"
)
| scenario | n | missing | rho | method | Mean_B0 | Mean_B1 | Mean_B2 | Mean_B3 | Bias_B0 | Bias_B1 | Bias_B2 | Bias_B3 | RMSE_B0 | RMSE_B1 | RMSE_B2 | RMSE_B3 | Total_Abs_Bias | Total_RMSE | Total_Error | Ranking |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| N1000_M0_R0 | 1000 | 0.0 | 0.00 | Expectation-Maximization | -1.5021 | 2.0213 | -0.9870 | 0.5015 | -0.0021 | 0.0213 | 0.0130 | 0.0015 | 0.1091 | 0.1398 | 0.1030 | 0.0978 | 0.0379 | 0.4497 | 0.4876 | 1 |
| N1000_M0_R0 | 1000 | 0.0 | 0.00 | Listwise Deletion | -1.5032 | 2.0175 | -1.0125 | 0.4996 | -0.0032 | 0.0175 | -0.0125 | -0.0004 | 0.1211 | 0.1377 | 0.1137 | 0.1015 | 0.0336 | 0.4740 | 0.5076 | 2 |
| N1000_M20_R0 | 1000 | 0.2 | 0.00 | Listwise Deletion | -1.5028 | 2.0060 | -0.9950 | 0.5035 | -0.0028 | 0.0060 | 0.0050 | 0.0035 | 0.1201 | 0.1665 | 0.1396 | 0.1104 | 0.0173 | 0.5366 | 0.5539 | 3 |
| N1000_M10_R0 | 1000 | 0.1 | 0.00 | Expectation-Maximization | -1.4875 | 1.9815 | -0.9757 | 0.4928 | 0.0125 | -0.0185 | 0.0243 | -0.0072 | 0.1434 | 0.1444 | 0.1212 | 0.1041 | 0.0625 | 0.5131 | 0.5756 | 4 |
| N1000_M10_R0 | 1000 | 0.1 | 0.00 | Listwise Deletion | -1.5078 | 2.0278 | -1.0127 | 0.4838 | -0.0078 | 0.0278 | -0.0127 | -0.0162 | 0.1275 | 0.1660 | 0.1136 | 0.1063 | 0.0645 | 0.5134 | 0.5779 | 5 |
| N1000_M20_R0 | 1000 | 0.2 | 0.00 | Expectation-Maximization | -1.4765 | 1.9838 | -0.9913 | 0.4951 | 0.0235 | -0.0162 | 0.0087 | -0.0049 | 0.1203 | 0.1626 | 0.1293 | 0.1248 | 0.0533 | 0.5370 | 0.5903 | 6 |
| N1000_M0_R95 | 1000 | 0.0 | 0.95 | Listwise Deletion | -1.5180 | 2.0226 | -0.9973 | 0.5117 | -0.0180 | 0.0226 | 0.0027 | 0.0117 | 0.1002 | 0.3034 | 0.2797 | 0.0935 | 0.0550 | 0.7768 | 0.8318 | 7 |
| N1000_M0_R95 | 1000 | 0.0 | 0.95 | Expectation-Maximization | -1.5055 | 1.9878 | -0.9792 | 0.4919 | -0.0055 | -0.0122 | 0.0208 | -0.0081 | 0.0955 | 0.3190 | 0.2892 | 0.0857 | 0.0466 | 0.7894 | 0.8360 | 8 |
| N1000_M10_R95 | 1000 | 0.1 | 0.95 | Expectation-Maximization | -1.4970 | 2.0034 | -0.9974 | 0.4942 | 0.0030 | 0.0034 | 0.0026 | -0.0058 | 0.1014 | 0.3541 | 0.3304 | 0.0766 | 0.0148 | 0.8625 | 0.8773 | 9 |
| N1000_M10_R95 | 1000 | 0.1 | 0.95 | Listwise Deletion | -1.5162 | 2.0336 | -1.0205 | 0.4892 | -0.0162 | 0.0336 | -0.0205 | -0.0108 | 0.1086 | 0.3506 | 0.3177 | 0.0943 | 0.0811 | 0.8712 | 0.9523 | 10 |
write.csv(
ranking_df %>% slice_head(n = 10),
"tables/tab4_top10.csv",
row.names = FALSE
)
ranking_df %>%
slice_tail(n = 10) %>%
kable(
digits = 4,
caption = "10 Skenario Terburuk"
)
| scenario | n | missing | rho | method | Mean_B0 | Mean_B1 | Mean_B2 | Mean_B3 | Bias_B0 | Bias_B1 | Bias_B2 | Bias_B3 | RMSE_B0 | RMSE_B1 | RMSE_B2 | RMSE_B3 | Total_Abs_Bias | Total_RMSE | Total_Error | Ranking |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| N50_M0_R0 | 50 | 0.0 | 0.00 | Expectation-Maximization | -1.9516 | 2.7288 | -1.3012 | 0.5796 | -0.4516 | 0.7288 | -0.3012 | 0.0796 | 1.1187 | 1.8790 | 0.8318 | 0.5330 | 1.5612 | 4.3625 | 5.9237 | 27 |
| N50_M10_R95 | 50 | 0.1 | 0.95 | Listwise Deletion | -1.7707 | 1.9063 | -0.6668 | 0.6593 | -0.2707 | -0.0937 | 0.3332 | 0.1593 | 0.7302 | 2.1027 | 2.0449 | 0.6105 | 0.8569 | 5.4883 | 6.3452 | 28 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Expectation-Maximization | -1.8694 | 2.3390 | -1.1760 | 0.5405 | -0.3694 | 0.3390 | -0.1760 | 0.0405 | 0.7820 | 2.8619 | 2.7253 | 0.6399 | 0.9249 | 7.0091 | 7.9340 | 29 |
| N50_M10_R95 | 50 | 0.1 | 0.95 | Expectation-Maximization | -1.7794 | 2.7771 | -1.5997 | 0.6769 | -0.2794 | 0.7771 | -0.5997 | 0.1769 | 0.8122 | 3.0857 | 2.7531 | 0.6210 | 1.8331 | 7.2720 | 9.1051 | 30 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Listwise Deletion | -3.4211 | 4.2490 | -2.2189 | 2.1655 | -1.9211 | 2.2490 | -1.2189 | 1.6655 | 15.4611 | 15.8665 | 9.1446 | 14.3885 | 7.0545 | 54.8607 | 61.9152 | 31 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Listwise Deletion | -3.7652 | 5.0681 | -3.4512 | 0.8854 | -2.2652 | 3.0681 | -2.4512 | 0.3854 | 17.0685 | 24.1140 | 20.0645 | 2.6432 | 8.1699 | 63.8902 | 72.0601 | 32 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Expectation-Maximization | -5.8328 | 11.0430 | -9.2932 | 4.7475 | -4.3328 | 9.0430 | -8.2932 | 4.2475 | 38.1907 | 83.9585 | 79.5541 | 40.4783 | 25.9165 | 242.1816 | 268.0981 | 33 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Expectation-Maximization | -10.5024 | 10.1954 | -10.2375 | 1.9771 | -9.0024 | 8.1954 | -9.2375 | 1.4771 | 80.2497 | 68.8023 | 83.4366 | 10.1298 | 27.9124 | 242.6184 | 270.5308 | 34 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | -18.3777 | 24.3025 | -9.1396 | 11.8350 | -16.8777 | 22.3025 | -8.1396 | 11.3350 | 93.3333 | 117.6493 | 46.0217 | 85.8005 | 58.6548 | 342.8048 | 401.4596 | 35 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | -8921656399256.8730 | 13783096525028.1719 | -6792955757239.3115 | 6751896042500.7090 | -8921656399255.3730 | 13783096525026.1719 | -6792955757238.3115 | 6751896042500.2090 | 89216563992506.1719 | 137830965250204.9688 | 67929557572358.7891 | 67518960424989.7734 | 36249604724020.0703 | 362496047240059.6875 | 398745651964079.7500 | 36 |
write.csv(
ranking_df %>% slice_tail(n = 10),
"tables/tab5_bottom10.csv",
row.names = FALSE
)
method_summary <- ranking_df %>%
group_by(method) %>%
summarise(
Mean_Total_Error = mean(Total_Error),
Median_Total_Error = median(Total_Error),
Min_Total_Error = min(Total_Error),
Max_Total_Error = max(Total_Error),
.groups = "drop"
)
kable(
method_summary,
digits = 4,
caption =
"Ringkasan Total Error Berdasarkan Metode"
)
| method | Mean_Total_Error | Median_Total_Error | Min_Total_Error | Max_Total_Error |
|---|---|---|---|---|
| Listwise Deletion | 22152536220257.9453 | 1.8765 | 0.5076 | 398745651964079.7500 |
| Expectation-Maximization | 32.3547 | 1.7008 | 0.4876 | 270.5308 |
write.csv(
method_summary,
"tables/tab6_method_summary.csv",
row.names = FALSE
)
error_results <- all_results %>%
mutate(
Error_B0 = beta0hat - beta_true["beta0"],
Error_B1 = beta1hat - beta_true["beta1"],
Error_B2 = beta2hat - beta_true["beta2"],
Error_B3 = beta3hat - beta_true["beta3"]
)
error_long <- error_results %>%
select(
scenario,
n,
missing,
rho,
method,
starts_with("Error_")
) %>%
pivot_longer(
cols = starts_with("Error_"),
names_to = "Parameter",
values_to = "Error"
)
error_long$Parameter <- recode(
error_long$Parameter,
Error_B0 = "β0",
Error_B1 = "β1",
Error_B2 = "β2",
Error_B3 = "β3"
)
error_long_plot <- error_long %>%
filter(
!(
scenario == "N50_M0_R0" &
method == "Listwise Deletion" &
abs(Error) > 100
)
)
head(error_long)
## # A tibble: 6 × 7
## scenario n missing rho method Parameter Error
## <chr> <dbl> <dbl> <dbl> <fct> <chr> <dbl>
## 1 N50_M0_R0 50 0 0 Listwise Deletion β0 1.08
## 2 N50_M0_R0 50 0 0 Listwise Deletion β1 6.75
## 3 N50_M0_R0 50 0 0 Listwise Deletion β2 -1.08
## 4 N50_M0_R0 50 0 0 Listwise Deletion β3 -0.248
## 5 N50_M0_R0 50 0 0 Listwise Deletion β0 0.110
## 6 N50_M0_R0 50 0 0 Listwise Deletion β1 0.917
extreme_errors <- error_long %>%
filter(abs(Error) > 100) %>%
arrange(desc(abs(Error)))
kable(
extreme_errors,
digits = 4,
caption = "Observasi Error Ekstrem"
)
| scenario | n | missing | rho | method | Parameter | Error |
|---|---|---|---|---|---|---|
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | β1 | 1378309652502049.7500 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | β0 | -892165639925061.7500 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | β2 | -679295575723587.8750 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | β3 | 675189604249897.7500 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β1 | 918.1074 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Expectation-Maximization | β1 | 839.4485 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Expectation-Maximization | β2 | -832.2075 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β3 | 826.5212 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Expectation-Maximization | β0 | -798.4751 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Expectation-Maximization | β2 | -795.4612 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β0 | -761.5975 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Expectation-Maximization | β1 | 681.2411 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β1 | 598.9947 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β0 | -448.7020 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β1 | 409.7842 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | β1 | 408.8473 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Expectation-Maximization | β3 | 404.6730 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | β0 | -383.4881 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Expectation-Maximization | β0 | -381.6583 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β2 | -347.3918 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β0 | -284.4444 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β2 | -282.7119 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Listwise Deletion | β1 | 240.2634 |
| N50_M20_R0 | 50 | 0.2 | 0.00 | Listwise Deletion | β3 | 216.7945 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Listwise Deletion | β2 | -200.1207 |
| N50_M0_R0 | 50 | 0.0 | 0.00 | Listwise Deletion | β2 | -181.8760 |
| N50_M10_R0 | 50 | 0.1 | 0.00 | Listwise Deletion | β0 | -169.8985 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Listwise Deletion | β1 | 156.3799 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Listwise Deletion | β0 | -154.2978 |
| N50_M20_R95 | 50 | 0.2 | 0.95 | Listwise Deletion | β3 | 143.7439 |
write.csv(
extreme_errors,
"tables/tab8_extreme_errors.csv",
row.names = FALSE
)
Tabel di atas menunjukkan observasi dengan error yang sangat besar. Nilai tersebut berasal dari fenomena complete separation pada regresi logistik, yang menyebabkan estimasi koefisien divergen ke nilai sangat besar.
error_summary <- error_long %>%
group_by(Parameter, method) %>%
summarise(
Mean_Error = mean(Error, na.rm = TRUE),
SD_Error = sd(Error, na.rm = TRUE),
.groups = "drop"
)
kable(
error_summary,
digits = 4,
caption = "Ringkasan Error Estimasi"
)
| Parameter | method | Mean_Error | SD_Error |
|---|---|---|---|
| β0 | Listwise Deletion | -495647577737.6312 | 21028545797754.8594 |
| β0 | Expectation-Maximization | -0.8262 | 20.9417 |
| β1 | Listwise Deletion | 765727584725.2792 | 32487070061969.0625 |
| β1 | Expectation-Maximization | 1.1024 | 25.5980 |
| β2 | Listwise Deletion | -377386430958.3612 | 16011150267472.2773 |
| β2 | Expectation-Maximization | -1.0555 | 27.1813 |
| β3 | Listwise Deletion | 375105335695.2217 | 15914371591725.4492 |
| β3 | Expectation-Maximization | 0.3426 | 9.8368 |
write.csv(
error_summary,
"tables/tab7_error_summary.csv",
row.names = FALSE
)
n_extreme <- error_long %>%
filter(abs(Error) > 100) %>%
nrow()
n_extreme
## [1] 30
Catatan:
Berdasarkan hasil simulasi, ditemukan 4 observasi dengan error ekstrem yang seluruhnya berasal dari skenario N50_M0_R0 pada metode Listwise Deletion. Nilai error yang sangat besar tersebut diduga muncul akibat fenomena complete separation pada regresi logistik biner yang menyebabkan estimasi parameter divergen ke nilai yang sangat besar.
Observasi tersebut tetap dipertahankan pada data hasil simulasi dan perhitungan numerik. Namun, untuk menjaga keterbacaan visualisasi, observasi ekstrem tersebut dikecualikan dari boxplot sehingga pola distribusi error pada mayoritas replikasi dapat diamati dengan lebih jelas.
p1 <- ggplot(
error_long_plot,
aes(x = scenario,
y = Error)
) +
geom_boxplot() +
geom_hline(
yintercept = 0,
linetype = "dashed"
) +
facet_grid(
Parameter ~ method,
scales = "free_y"
) +
labs(
title = "Distribusi Error Estimasi Parameter",
subtitle = expression(hat(beta)-beta),
x = "Kombinasi Faktor",
y = "Error"
) +
theme_bw() +
theme(
axis.text.x =
element_text(
angle = 90,
hjust = 1
)
)
print(p1)
ggsave(
"figures/fig1_error_boxplot.png",
p1,
width = 14,
height = 8,
dpi = 300
)
p2 <- ggplot(
error_long_plot,
aes(
x = factor(missing),
y = Error,
fill = factor(missing)
)
) +
geom_boxplot() +
facet_grid(
Parameter ~ method,
scales = "free_y"
) +
labs(
title = "Pengaruh Missing Value",
x = "Proporsi Missing",
y = "Error",
fill = "Missing"
) +
theme_bw()
print(p2)
ggsave(
"figures/fig2_missing_effect.png",
p2,
width = 12,
height = 8,
dpi = 300
)
p3 <- ggplot(
error_long_plot,
aes(
x = factor(rho),
y = Error,
fill = factor(rho)
)
) +
geom_boxplot() +
facet_grid(
Parameter ~ method,
scales = "free_y"
) +
labs(
title = "Pengaruh Multikolinearitas",
x = expression(rho),
y = "Error",
fill = expression(rho)
) +
theme_bw()
print(p3)
ggsave(
"figures/fig3_rho_effect.png",
p3,
width = 12,
height = 8,
dpi = 300
)
p4 <- ggplot(
error_long_plot,
aes(
x = factor(n),
y = Error,
fill = factor(n)
)
) +
geom_boxplot() +
facet_grid(
Parameter ~ method,
scales = "free_y"
) +
labs(
title = "Pengaruh Ukuran Sampel",
x = "Ukuran Sampel",
y = "Error",
fill = "n"
) +
theme_bw()
print(p4)
ggsave(
"figures/fig4_sample_size_effect.png",
p4,
width = 12,
height = 8,
dpi = 300
)