1. PENGATURAN AWAL
# Menentukan seed agar hasil data acak tetap sama saat dijalankan ulang
set.seed(67)
# Menentukan Parameter Sebenarnya
# b0: Peluang dasar (intercept)
# b1: Pengaruh Kecepatan Internet (X1)
# b2: Pengaruh Waktu Masuk Waiting Room (X2)
b0 <- -4.0
b1 <- 0.05
b2 <- 0.08
2. FUNGSI PEMBANGKITAN DATA
# Fungsi ini menerima jumlah sampel (n) dan persentase outlier (p_out)
simulasi_lengkap <- function(n, p_out) {
# a. Membangkitkan Data Normal (X1 dan X2)
internet <- rnorm(n, mean = 50, sd = 10)
waiting_room <- runif(n, min = 0, max = 30)
# b. Menghitung Probabilitas & Generate Y (Biner)
logit <- b0 + (b1 * internet) + (b2 * waiting_room)
prob <- 1 / (1 + exp(-logit))
berhasil <- rbinom(n, size = 1, prob = prob)
# c. Penambahan Outlier (Jika p_out > 0)
# Outlier dibuat dengan membalik hasil (berhasil jadi gagal, gagal jadi berhasil)
n_outlier <- round(n * p_out)
if(n_outlier > 0) {
indeks_acak <- sample(1:n, n_outlier)
berhasil[indeks_acak] <- 1 - berhasil[indeks_acak]
}
# d. Melakukan Estimasi Parameter (GLM) [cite: 11, 49]
data_frame <- data.frame(Y = berhasil, X1 = internet, X2 = waiting_room)
model_estimasi <- glm(Y ~ X1 + X2, data = data_frame, family = binomial)
return(coef(model_estimasi))
}
3. PROSES REPLIKASI
n_rep <- 100
# SKENARIO 1: Pengaruh Ukuran Sampel (Outlier 0%)
n50 <- replicate(n_rep, simulasi_lengkap(50, 0))
n150 <- replicate(n_rep, simulasi_lengkap(150, 0))
n500 <- replicate(n_rep, simulasi_lengkap(500, 0))
# SKENARIO 2: Pengaruh Outlier (Ukuran Sampel Tetap n=500)
out5 <- replicate(n_rep, simulasi_lengkap(500, 0.05))
out10 <- replicate(n_rep, simulasi_lengkap(500, 0.10))
4. EVALUASI BIAS
# Evaluasi Skenario Ukuran Sampel
# --- Untuk Sampel n = 50 ---
bias_b0_n50 <- mean(n50[1, ]) - b0
bias_b1_n50 <- mean(n50[2, ]) - b1
bias_b2_n50 <- mean(n50[3, ]) - b2
# --- Untuk Sampel n = 150 ---
bias_b0_n150 <- mean(n150[1, ]) - b0
bias_b1_n150 <- mean(n150[2, ]) - b1
bias_b2_n150 <- mean(n150[3, ]) - b2
# --- Untuk Sampel n = 500 ---
bias_b0_n500 <- mean(n500[1, ]) - b0
bias_b1_n500 <- mean(n500[2, ]) - b1
bias_b2_n500 <- mean(n500[3, ]) - b2
# Evaluasi Skenario Tingkat Outlier (n=500)
# --- Untuk Outlier 5% ---
bias_b0_out5 <- mean(out5[1, ]) - b0
bias_b1_out5 <- mean(out5[2, ]) - b1
bias_b2_out5 <- mean(out5[3, ]) - b2
# --- Untuk Outlier 10% ---
bias_b0_out10 <- mean(out10[1, ]) - b0
bias_b1_out10 <- mean(out10[2, ]) - b1
bias_b2_out10 <- mean(out10[3, ]) - b2
cat("HASIL PERHITUNGAN BIAS PARAMETER\n")
## HASIL PERHITUNGAN BIAS PARAMETER
cat("--- SKENARIO 1: UKURAN SAMPEL ---\n")
## --- SKENARIO 1: UKURAN SAMPEL ---
cat("n=50 | b0:", bias_b0_n50, "| b1:", bias_b1_n50, "| b2:", bias_b2_n50, "\n")
## n=50 | b0: -0.7489135 | b1: 0.01071193 | b2: 0.008879265
cat("n=150 | b0:", bias_b0_n150, "| b1:", bias_b1_n150, "| b2:", bias_b2_n150, "\n")
## n=150 | b0: -0.08756911 | b1: 0.001108675 | b2: 0.004067383
cat("n=500 | b0:", bias_b0_n500, "| b1:", bias_b1_n500, "| b2:", bias_b2_n500, "\n\n")
## n=500 | b0: 0.02829129 | b1: -0.0002749551 | b2: -0.0008725506
cat("--- SKENARIO 2: TINGKAT OUTLIER (n=500) ---\n")
## --- SKENARIO 2: TINGKAT OUTLIER (n=500) ---
cat("Out 5% | b0:", bias_b0_out5, "| b1:", bias_b1_out5, "| b2:", bias_b2_out5, "\n")
## Out 5% | b0: 0.5361233 | b1: -0.007247473 | b2: -0.008928612
cat("Out 10% | b0:", bias_b0_out10, "| b1:", bias_b1_out10, "| b2:", bias_b2_out10, "\n")
## Out 10% | b0: 1.014819 | b1: -0.01276899 | b2: -0.02013585
5. Visualisasi Boxplot
# 1. Ukuran Sampel
# Plot b1 - Internet
boxplot(n50[2,], n150[2,], n500[2,],
names = c("n50", "n150", "n500"),
main = "Sampel vs b1 (Internet)", col = "lightblue",
ylim = c(-0.1, 0.2),
outline = FALSE)
abline(h = b1, col = "red", lwd = 2, lty = 2)

# Plot b2 - Waiting Room
boxplot(n50[3,], n150[3,], n500[3,],
names = c("n50", "n150", "n500"),
main = "Sampel vs b2 (Waiting Room)", col = "lightgreen",
ylim = c(-0.1, 0.3),
outline = FALSE)
abline(h = b2, col = "red", lwd = 2, lty = 2)

# 2. Outliers
# Plot b1 - Internet
boxplot(n500[2,], out5[2,], out10[2,],
names = c("0%", "5%", "10%"),
main = "Outlier vs b1 (Internet)", col = "orange",
ylim = c(-0.1, 0.2),
outline = FALSE)
abline(h = b1, col = "red", lwd = 2, lty = 2)

# Plot b2 - Waiting Room
boxplot(n500[3,], out5[3,], out10[3,],
names = c("0%", "5%", "10%"),
main = "Outlier vs b2 (Waiting Room)", col = "tomato",
ylim = c(-0.1, 0.3),
outline = FALSE)
abline(h = b2, col = "red", lwd = 2, lty = 2)
