#1. Model dan Parameter Awal
Model yang digunakan adalah regresi logistik biner:

log(𝑝/1−𝑝)=𝛽0 +𝛽1𝑋1 +𝛽2𝑋2

Dengan parameter sebenarnya: β₀ = -1 β₁ = 0.8 β₂ = -0.5

#2. Pembangkitan Data Simulasi

set.seed(123)

generate_data <- function(n) {
  X1 <- rnorm(n, 0, 1)
  X2 <- rnorm(n, 0, 1)
  
  beta0 <- -1
  beta1 <- 0.8
  beta2 <- -0.5
  
  p <- exp(beta0 + beta1*X1 + beta2*X2) / 
       (1 + exp(beta0 + beta1*X1 + beta2*X2))
  
  Y <- rbinom(n, 1, p)
  
  data.frame(Y, X1, X2)
}

#3. Skenario Simulasi
#a. ukuran sampel n = 50 (kecil) n = 100 (sedang) n = 500 (besar)

#b. multikolinearitas

generate_data_multi <- function(n) {
  X1 <- rnorm(n)
  X2 <- X1 + rnorm(n, sd = 0.1)
  
  beta0 <- -1
  beta1 <- 0.8
  beta2 <- -0.5
  
  p <- exp(beta0 + beta1*X1 + beta2*X2) / 
       (1 + exp(beta0 + beta1*X1 + beta2*X2))
  
  Y <- rbinom(n, 1, p)
  
  data.frame(Y, X1, X2)
}

#c. outlier

generate_data_outlier <- function(n) {
  data <- generate_data(n)
  data$X1[1:5] <- data$X1[1:5] * 10
  return(data)
}

#4. Replikasi Simulasi

simulate <- function(gen_func, n, n_rep=100){
  results <- matrix(NA, nrow=n_rep, ncol=3)
  
  for(i in 1:n_rep){
    data <- gen_func(n)
    model <- glm(Y ~ X1 + X2, data=data, family=binomial)
    results[i,] <- coef(model)
  }
  
  colnames(results) <- c("beta0","beta1","beta2")
  return(results)
}

#5. Estimasi Parameter

analyze <- function(results){
  true <- c(-1, 0.8, -0.5)
  
  mean_est <- colMeans(results)
  bias <- mean_est - true
  
  data.frame(
    Parameter = c("beta0","beta1","beta2"),
    Mean_Estimate = mean_est,
    True_Value = true,
    Bias = bias
  )
}

#6. Evaluasi dan Analisis
#a. ukuran sampel

n_values <- c(50, 100, 500)

hasil_ukuran <- do.call(rbind, lapply(n_values, function(n){
  analisis <- analyze(simulate(generate_data, n))
  analisis$Skenario <- paste("Ukuran Sampel", n)
  analisis
}))

hasil_ukuran
##        Parameter Mean_Estimate True_Value         Bias          Skenario
## beta0      beta0    -1.1107482       -1.0 -0.110748174  Ukuran Sampel 50
## beta1      beta1     0.9612801        0.8  0.161280144  Ukuran Sampel 50
## beta2      beta2    -0.6005512       -0.5 -0.100551166  Ukuran Sampel 50
## beta01     beta0    -1.0544074       -1.0 -0.054407439 Ukuran Sampel 100
## beta11     beta1     0.8227361        0.8  0.022736111 Ukuran Sampel 100
## beta21     beta2    -0.5569477       -0.5 -0.056947730 Ukuran Sampel 100
## beta02     beta0    -0.9980091       -1.0  0.001990933 Ukuran Sampel 500
## beta12     beta1     0.7946960        0.8 -0.005303964 Ukuran Sampel 500
## beta22     beta2    -0.5128277       -0.5 -0.012827661 Ukuran Sampel 500

#b. multikolinearitas & outlier

multi <- analyze(simulate(generate_data_multi, 100))
multi$Skenario <- "Multikolinearitas"

outlier <- analyze(simulate(generate_data_outlier, 100))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
outlier$Skenario <- "Outlier"

multi
##       Parameter Mean_Estimate True_Value        Bias          Skenario
## beta0     beta0    -1.0605226       -1.0 -0.06052262 Multikolinearitas
## beta1     beta1     0.7817970        0.8 -0.01820297 Multikolinearitas
## beta2     beta2    -0.4341459       -0.5  0.06585410 Multikolinearitas
outlier
##       Parameter Mean_Estimate True_Value         Bias Skenario
## beta0     beta0    -0.9939397       -1.0  0.006060296  Outlier
## beta1     beta1     0.4195658        0.8 -0.380434216  Outlier
## beta2     beta2    -0.5182464       -0.5 -0.018246353  Outlier

#c. perbandingan semua skenario

hasil_semua <- rbind(hasil_ukuran, multi, outlier)

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
hasil_semua$Abs_Bias <- abs(hasil_semua$Bias)

perbandingan <- hasil_semua %>%
  group_by(Skenario) %>%
  summarise(mean_bias = mean(Abs_Bias))

perbandingan
## # A tibble: 5 × 2
##   Skenario          mean_bias
##   <chr>                 <dbl>
## 1 Multikolinearitas   0.0482 
## 2 Outlier             0.135  
## 3 Ukuran Sampel 100   0.0447 
## 4 Ukuran Sampel 50    0.124  
## 5 Ukuran Sampel 500   0.00671

#d. penentuan bias terbesar & terbaik

bias_terbesar <- perbandingan[which.max(perbandingan$mean_bias), ]
bias_terkecil <- perbandingan[which.min(perbandingan$mean_bias), ]

bias_terbesar
## # A tibble: 1 × 2
##   Skenario mean_bias
##   <chr>        <dbl>
## 1 Outlier      0.135
bias_terkecil
## # A tibble: 1 × 2
##   Skenario          mean_bias
##   <chr>                 <dbl>
## 1 Ukuran Sampel 500   0.00671

#7. Penyajian Hasil
#a. tabel ringkasan

perbandingan
## # A tibble: 5 × 2
##   Skenario          mean_bias
##   <chr>                 <dbl>
## 1 Multikolinearitas   0.0482 
## 2 Outlier             0.135  
## 3 Ukuran Sampel 100   0.0447 
## 4 Ukuran Sampel 50    0.124  
## 5 Ukuran Sampel 500   0.00671

#b. grafik

barplot(perbandingan$mean_bias,
        names.arg = perbandingan$Skenario,
        las = 2,
        col = "lightblue",
        ylab = "Rata-rata Bias",
        main = "Perbandingan Bias Antar Skenario")