# ==================================================
# SIMULASI REGRESI LOGISTIK
# UTS Pemodelan Statistika dan Simulasi
# ==================================================
# 1. SETUP ----
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
# Set seed untuk reproduksibilitas
set.seed(2025)
# 2. PARAMETER TRUE ----
beta0_true <- 0.5
beta1_true <- 1.0
beta2_true <- -0.8
# 3. FUNGSI GENERATE DATA ----
generate_data <- function(n, outlier_pct = 0) {
# Bangkitkan prediktor
X1 <- rnorm(n, 0, 1)
X2 <- rnorm(n, 0, 1)
# Tambahkan outlier pada X1 jika diperlukan
if (outlier_pct > 0) {
n_out <- floor(n * outlier_pct / 100)
if (n_out > 0) {
idx <- sample(1:n, n_out)
X1[idx] <- rnorm(n_out, mean = 5, sd = 1) # outlier ekstrim
}
}
# Hitung probabilitas dan bangkitkan Y
lin_pred <- beta0_true + beta1_true * X1 + beta2_true * X2
prob <- 1 / (1 + exp(-lin_pred))
Y <- rbinom(n, 1, prob)
return(data.frame(Y = Y, X1 = X1, X2 = X2))
}
# 4. SKENARIO SIMULASI ----
n_vals <- c(50, 100, 200) # ukuran sampel
outlier_vals <- c(0, 5, 10) # persentase outlier
R <- 50 # jumlah replikasi
# 5. LOOP SIMULASI ----
hasil <- data.frame()
for (n in n_vals) {
for (out_pct in outlier_vals) {
for (r in 1:R) {
# Bangkitkan data
data <- generate_data(n, out_pct)
# Estimasi parameter dengan MLE
fit <- glm(Y ~ X1 + X2, data = data, family = binomial)
coefs <- coef(fit)
# Simpan hasil
hasil <- rbind(hasil, data.frame(
n = n,
outlier_pct = out_pct,
replikasi = r,
beta0_est = coefs[1],
beta1_est = coefs[2],
beta2_est = coefs[3]
))
}
}
}
# 6. HITUNG BIAS ----
hasil_bias <- hasil %>%
mutate(
bias0 = beta0_est - beta0_true,
bias1 = beta1_est - beta1_true,
bias2 = beta2_est - beta2_true
)
# 7. RINGKASAN PER SKENARIO ----
ringkasan <- hasil_bias %>%
group_by(n, outlier_pct) %>%
summarise(
Bias_beta0 = mean(bias0, na.rm = TRUE),
Bias_beta1 = mean(bias1, na.rm = TRUE),
Bias_beta2 = mean(bias2, na.rm = TRUE),
.groups = "drop"
)
# Tampilkan hasil
print("=== RINGKASAN BIAS PER SKENARIO ===")
## [1] "=== RINGKASAN BIAS PER SKENARIO ==="
print(ringkasan)
## # A tibble: 9 × 5
## n outlier_pct Bias_beta0 Bias_beta1 Bias_beta2
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 50 0 0.0495 0.204 -0.119
## 2 50 5 -0.0430 0.124 0.0280
## 3 50 10 0.0634 0.240 -0.0954
## 4 100 0 -0.0332 0.0237 -0.0797
## 5 100 5 -0.0187 0.0600 -0.0301
## 6 100 10 0.0228 0.0307 0.0160
## 7 200 0 0.0400 0.0332 -0.00849
## 8 200 5 0.0511 0.0445 -0.0687
## 9 200 10 0.0324 0.0455 -0.0554
# 8. VISUALISASI ----
# Grafik bias beta1
ggplot(ringkasan, aes(x = factor(n), y = Bias_beta1, fill = factor(outlier_pct))) +
geom_col(position = "dodge", width = 0.7) +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
labs(
title = "Bias Estimasi Beta1 berdasarkan Ukuran Sampel dan Outlier",
x = "Ukuran Sampel (n)",
y = "Rata-rata Bias",
fill = "Outlier (%)"
) +
theme_minimal() +
theme(legend.position = "bottom")
