library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
library(ggplot2)
# -----------------------------
# 1. Persiapan
# -----------------------------
rm(list = ls())
set.seed(42)

# Parameter sebenarnya
beta0 = -1
beta1 = 1
beta2 = 0.8

# Jumlah replikasi
R <- 100
# -----------------------------
# 2. Menentukan Skenario
# -----------------------------
skenario = data.frame(
  nama = c("S1","S2","S3","S4"),
  n = c(50,200,50,200),
  miss = c(0,0,0.10,0.10)
)

print(skenario)
##   nama   n miss
## 1   S1  50  0.0
## 2   S2 200  0.0
## 3   S3  50  0.1
## 4   S4 200  0.1
#Menyiapkan Tempat Hasil
hasil = data.frame()
#----------------------------------------------------
# 3. Simulasi
#----------------------------------------------------
for(i in 1:nrow(skenario))
{
  
  n = skenario$n[i]
  miss = skenario$miss[i]
  nama = skenario$nama[i]
  
  for(r in 1:R)
  {
    
    # bangkitkan variabel bebas
    X1 = rnorm(n,0,1)
    X2 = rnorm(n,0,1)
    
    # hitung probabilitas
    eta = beta0 + beta1*X1 + beta2*X2
    p = exp(eta)/(1+exp(eta))
    
    # bangkitkan respon biner
    Y = rbinom(n,1,p)
    
    data = data.frame(Y,X1,X2)
    # tambahkan missing value pada X1
    if(miss > 0)
    {
      idx = sample(1:n, round(miss*n))
      data$X1[idx] = NA
    }
    
    # estimasi model
    model = glm(Y ~ X1 + X2,
                family = binomial,
                data = data)
    
    b = coef(model)
    
    # simpan hasil
    hasil = rbind(hasil,
                  data.frame(
                    skenario = nama,
                    replikasi = r,
                    beta0_hat = b[1],
                    beta1_hat = b[2],
                    beta2_hat = b[3]
                  ))
  }
}
#Menampilkan Hasil Awal
head(hasil)
##              skenario replikasi  beta0_hat beta1_hat beta2_hat
## (Intercept)        S1         1 -1.9512364 1.3429971 0.5738357
## (Intercept)1       S1         2 -2.2994115 0.2711374 1.5108756
## (Intercept)2       S1         3 -0.5465119 0.7499164 0.7289613
## (Intercept)3       S1         4 -2.5098794 2.8812612 1.7006494
## (Intercept)4       S1         5 -0.7204151 0.5189767 0.6676385
## (Intercept)5       S1         6 -0.8235615 1.3736119 0.9851864
#----------------------------------------------------
# 4. Menghitung Rata-rata Estimasi dan Bias
#----------------------------------------------------
ringkasan = aggregate(cbind(beta0_hat,
                            beta1_hat,
                            beta2_hat)
                      ~ skenario,
                      data = hasil,
                      mean)

ringkasan$bias_beta0 = ringkasan$beta0_hat - beta0
ringkasan$bias_beta1 = ringkasan$beta1_hat - beta1
ringkasan$bias_beta2 = ringkasan$beta2_hat - beta2

print(ringkasan)
##   skenario beta0_hat beta1_hat beta2_hat  bias_beta0 bias_beta1 bias_beta2
## 1       S1 -1.111891  1.088251 0.9136378 -0.11189090 0.08825085 0.11363780
## 2       S2 -1.022236  1.013225 0.8301357 -0.02223618 0.01322526 0.03013568
## 3       S3 -1.138770  1.289175 0.9926623 -0.13877049 0.28917529 0.19266234
## 4       S4 -1.023961  1.010367 0.8253414 -0.02396084 0.01036714 0.02534143
#----------------------------------------------------
# 5. Grafik Bias Beta1
#----------------------------------------------------
barplot(ringkasan$bias_beta1,
        names.arg = ringkasan$skenario,
        col = "lightblue",
        main = "Bias Estimasi Beta1",
        ylab = "Bias")

abline(h = 0, lty = 2)

#----------------------------------------------------
# 6. Menentukan Skenario Terbaik
#----------------------------------------------------
total_bias = abs(ringkasan$bias_beta0) +
             abs(ringkasan$bias_beta1) +
             abs(ringkasan$bias_beta2)

ringkasan$total_bias = total_bias

print(ringkasan)
##   skenario beta0_hat beta1_hat beta2_hat  bias_beta0 bias_beta1 bias_beta2
## 1       S1 -1.111891  1.088251 0.9136378 -0.11189090 0.08825085 0.11363780
## 2       S2 -1.022236  1.013225 0.8301357 -0.02223618 0.01322526 0.03013568
## 3       S3 -1.138770  1.289175 0.9926623 -0.13877049 0.28917529 0.19266234
## 4       S4 -1.023961  1.010367 0.8253414 -0.02396084 0.01036714 0.02534143
##   total_bias
## 1 0.31377956
## 2 0.06559711
## 3 0.62060812
## 4 0.05966942
best = ringkasan[which.min(ringkasan$total_bias), ]
worst = ringkasan[which.max(ringkasan$total_bias), ]

cat("\nSkenario terbaik :", best$skenario, "\n")
## 
## Skenario terbaik : S4
cat("Skenario terburuk:", worst$skenario, "\n")
## Skenario terburuk: S3