1. PENGATURAN AWAL

# Menentukan seed agar hasil data acak tetap sama saat dijalankan ulang
set.seed(67)
# Menentukan Parameter Sebenarnya 
# b0: Peluang dasar (intercept)
# b1: Pengaruh Kecepatan Internet (X1)
# b2: Pengaruh Waktu Masuk Waiting Room (X2)
b0 <- -4.0
b1 <- 0.05
b2 <- 0.08

2. FUNGSI PEMBANGKITAN DATA

# Fungsi ini menerima jumlah sampel (n) dan persentase outlier (p_out)
simulasi_lengkap <- function(n, p_out) {
  
  # a. Membangkitkan Data Normal (X1 dan X2)
  internet <- rnorm(n, mean = 50, sd = 10)     
  waiting_room <- runif(n, min = 0, max = 30)  
  
  # b. Menghitung Probabilitas & Generate Y (Biner) 
  logit <- b0 + (b1 * internet) + (b2 * waiting_room)
  prob  <- 1 / (1 + exp(-logit))
  berhasil <- rbinom(n, size = 1, prob = prob)
  
  # c. Penambahan Outlier (Jika p_out > 0) 
  # Outlier dibuat dengan membalik hasil (berhasil jadi gagal, gagal jadi berhasil)
  n_outlier <- round(n * p_out)
  if(n_outlier > 0) {
    indeks_acak <- sample(1:n, n_outlier)
    berhasil[indeks_acak] <- 1 - berhasil[indeks_acak] 
  }
  
  # d. Melakukan Estimasi Parameter (GLM) [cite: 11, 49]
  data_frame <- data.frame(Y = berhasil, X1 = internet, X2 = waiting_room)
  model_estimasi <- glm(Y ~ X1 + X2, data = data_frame, family = binomial)
  
  return(coef(model_estimasi))
}

3. PROSES REPLIKASI

n_rep <- 100

# SKENARIO 1: Pengaruh Ukuran Sampel (Outlier 0%) 
n50  <- replicate(n_rep, simulasi_lengkap(50, 0))
n150 <- replicate(n_rep, simulasi_lengkap(150, 0))
n500 <- replicate(n_rep, simulasi_lengkap(500, 0))

# SKENARIO 2: Pengaruh Outlier (Ukuran Sampel Tetap n=500) 
out5  <- replicate(n_rep, simulasi_lengkap(500, 0.05))
out10 <- replicate(n_rep, simulasi_lengkap(500, 0.10))

4. EVALUASI BIAS

#  Evaluasi Skenario Ukuran Sampel
# --- Untuk Sampel n = 50 ---
bias_b0_n50 <- mean(n50[1, ]) - b0
bias_b1_n50 <- mean(n50[2, ]) - b1
bias_b2_n50 <- mean(n50[3, ]) - b2

# --- Untuk Sampel n = 150 ---
bias_b0_n150 <- mean(n150[1, ]) - b0
bias_b1_n150 <- mean(n150[2, ]) - b1
bias_b2_n150 <- mean(n150[3, ]) - b2

# --- Untuk Sampel n = 500 ---
bias_b0_n500 <- mean(n500[1, ]) - b0
bias_b1_n500 <- mean(n500[2, ]) - b1
bias_b2_n500 <- mean(n500[3, ]) - b2

# Evaluasi Skenario Tingkat Outlier (n=500)
# --- Untuk Outlier 5% ---
bias_b0_out5 <- mean(out5[1, ]) - b0
bias_b1_out5 <- mean(out5[2, ]) - b1
bias_b2_out5 <- mean(out5[3, ]) - b2

# --- Untuk Outlier 10% ---
bias_b0_out10 <- mean(out10[1, ]) - b0
bias_b1_out10 <- mean(out10[2, ]) - b1
bias_b2_out10 <- mean(out10[3, ]) - b2


cat("HASIL PERHITUNGAN BIAS PARAMETER\n")
## HASIL PERHITUNGAN BIAS PARAMETER
cat("--- SKENARIO 1: UKURAN SAMPEL ---\n")
## --- SKENARIO 1: UKURAN SAMPEL ---
cat("n=50  | b0:", bias_b0_n50, "| b1:", bias_b1_n50, "| b2:", bias_b2_n50, "\n")
## n=50  | b0: -0.7489135 | b1: 0.01071193 | b2: 0.008879265
cat("n=150 | b0:", bias_b0_n150, "| b1:", bias_b1_n150, "| b2:", bias_b2_n150, "\n")
## n=150 | b0: -0.08756911 | b1: 0.001108675 | b2: 0.004067383
cat("n=500 | b0:", bias_b0_n500, "| b1:", bias_b1_n500, "| b2:", bias_b2_n500, "\n\n")
## n=500 | b0: 0.02829129 | b1: -0.0002749551 | b2: -0.0008725506
cat("--- SKENARIO 2: TINGKAT OUTLIER (n=500) ---\n")
## --- SKENARIO 2: TINGKAT OUTLIER (n=500) ---
cat("Out 5%  | b0:", bias_b0_out5, "| b1:", bias_b1_out5, "| b2:", bias_b2_out5, "\n")
## Out 5%  | b0: 0.5361233 | b1: -0.007247473 | b2: -0.008928612
cat("Out 10% | b0:", bias_b0_out10, "| b1:", bias_b1_out10, "| b2:", bias_b2_out10, "\n")
## Out 10% | b0: 1.014819 | b1: -0.01276899 | b2: -0.02013585

5. Visualisasi Boxplot

# 1. Ukuran Sampel

# Plot b1 - Internet 
boxplot(n50[2,], n150[2,], n500[2,], 
        names = c("n50", "n150", "n500"), 
        main = "Sampel vs b1 (Internet)", col = "lightblue",
        ylim = c(-0.1, 0.2),
        outline = FALSE)      
abline(h = b1, col = "red", lwd = 2, lty = 2)

# Plot b2 - Waiting Room 
boxplot(n50[3,], n150[3,], n500[3,], 
        names = c("n50", "n150", "n500"), 
        main = "Sampel vs b2 (Waiting Room)", col = "lightgreen",
        ylim = c(-0.1, 0.3),  
        outline = FALSE)
abline(h = b2, col = "red", lwd = 2, lty = 2)

# 2. Outliers

# Plot b1 - Internet 
boxplot(n500[2,], out5[2,], out10[2,], 
        names = c("0%", "5%", "10%"), 
        main = "Outlier vs b1 (Internet)", col = "orange",
        ylim = c(-0.1, 0.2), 
        outline = FALSE)
abline(h = b1, col = "red", lwd = 2, lty = 2)

# Plot b2 - Waiting Room 
boxplot(n500[3,], out5[3,], out10[3,], 
        names = c("0%", "5%", "10%"), 
        main = "Outlier vs b2 (Waiting Room)", col = "tomato",
        ylim = c(-0.1, 0.3), 
        outline = FALSE)
abline(h = b2, col = "red", lwd = 2, lty = 2)