#1. Model dan Parameter Awal
Model yang digunakan adalah regresi logistik biner:
log(𝑝/1−𝑝)=𝛽0 +𝛽1𝑋1 +𝛽2𝑋2
Dengan parameter sebenarnya: β₀ = -1 β₁ = 0.8 β₂ = -0.5
#2. Pembangkitan Data Simulasi
set.seed(123)
generate_data <- function(n) {
X1 <- rnorm(n, 0, 1)
X2 <- rnorm(n, 0, 1)
beta0 <- -1
beta1 <- 0.8
beta2 <- -0.5
p <- exp(beta0 + beta1*X1 + beta2*X2) /
(1 + exp(beta0 + beta1*X1 + beta2*X2))
Y <- rbinom(n, 1, p)
data.frame(Y, X1, X2)
}
#3. Skenario Simulasi
#a. ukuran sampel n = 50 (kecil) n = 100 (sedang) n = 500 (besar)
#b. multikolinearitas
generate_data_multi <- function(n) {
X1 <- rnorm(n)
X2 <- X1 + rnorm(n, sd = 0.1)
beta0 <- -1
beta1 <- 0.8
beta2 <- -0.5
p <- exp(beta0 + beta1*X1 + beta2*X2) /
(1 + exp(beta0 + beta1*X1 + beta2*X2))
Y <- rbinom(n, 1, p)
data.frame(Y, X1, X2)
}
#c. outlier
generate_data_outlier <- function(n) {
data <- generate_data(n)
data$X1[1:5] <- data$X1[1:5] * 10
return(data)
}
#4. Replikasi Simulasi
simulate <- function(gen_func, n, n_rep=100){
results <- matrix(NA, nrow=n_rep, ncol=3)
for(i in 1:n_rep){
data <- gen_func(n)
model <- glm(Y ~ X1 + X2, data=data, family=binomial)
results[i,] <- coef(model)
}
colnames(results) <- c("beta0","beta1","beta2")
return(results)
}
#5. Estimasi Parameter
analyze <- function(results){
true <- c(-1, 0.8, -0.5)
mean_est <- colMeans(results)
bias <- mean_est - true
data.frame(
Parameter = c("beta0","beta1","beta2"),
Mean_Estimate = mean_est,
True_Value = true,
Bias = bias
)
}
#6. Evaluasi dan Analisis
#a. ukuran sampel
n_values <- c(50, 100, 500)
hasil_ukuran <- do.call(rbind, lapply(n_values, function(n){
analisis <- analyze(simulate(generate_data, n))
analisis$Skenario <- paste("Ukuran Sampel", n)
analisis
}))
hasil_ukuran
## Parameter Mean_Estimate True_Value Bias Skenario
## beta0 beta0 -1.1107482 -1.0 -0.110748174 Ukuran Sampel 50
## beta1 beta1 0.9612801 0.8 0.161280144 Ukuran Sampel 50
## beta2 beta2 -0.6005512 -0.5 -0.100551166 Ukuran Sampel 50
## beta01 beta0 -1.0544074 -1.0 -0.054407439 Ukuran Sampel 100
## beta11 beta1 0.8227361 0.8 0.022736111 Ukuran Sampel 100
## beta21 beta2 -0.5569477 -0.5 -0.056947730 Ukuran Sampel 100
## beta02 beta0 -0.9980091 -1.0 0.001990933 Ukuran Sampel 500
## beta12 beta1 0.7946960 0.8 -0.005303964 Ukuran Sampel 500
## beta22 beta2 -0.5128277 -0.5 -0.012827661 Ukuran Sampel 500
#b. multikolinearitas & outlier
multi <- analyze(simulate(generate_data_multi, 100))
multi$Skenario <- "Multikolinearitas"
outlier <- analyze(simulate(generate_data_outlier, 100))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
outlier$Skenario <- "Outlier"
multi
## Parameter Mean_Estimate True_Value Bias Skenario
## beta0 beta0 -1.0605226 -1.0 -0.06052262 Multikolinearitas
## beta1 beta1 0.7817970 0.8 -0.01820297 Multikolinearitas
## beta2 beta2 -0.4341459 -0.5 0.06585410 Multikolinearitas
outlier
## Parameter Mean_Estimate True_Value Bias Skenario
## beta0 beta0 -0.9939397 -1.0 0.006060296 Outlier
## beta1 beta1 0.4195658 0.8 -0.380434216 Outlier
## beta2 beta2 -0.5182464 -0.5 -0.018246353 Outlier
#c. perbandingan semua skenario
hasil_semua <- rbind(hasil_ukuran, multi, outlier)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
hasil_semua$Abs_Bias <- abs(hasil_semua$Bias)
perbandingan <- hasil_semua %>%
group_by(Skenario) %>%
summarise(mean_bias = mean(Abs_Bias))
perbandingan
## # A tibble: 5 × 2
## Skenario mean_bias
## <chr> <dbl>
## 1 Multikolinearitas 0.0482
## 2 Outlier 0.135
## 3 Ukuran Sampel 100 0.0447
## 4 Ukuran Sampel 50 0.124
## 5 Ukuran Sampel 500 0.00671
#d. penentuan bias terbesar & terbaik
bias_terbesar <- perbandingan[which.max(perbandingan$mean_bias), ]
bias_terkecil <- perbandingan[which.min(perbandingan$mean_bias), ]
bias_terbesar
## # A tibble: 1 × 2
## Skenario mean_bias
## <chr> <dbl>
## 1 Outlier 0.135
bias_terkecil
## # A tibble: 1 × 2
## Skenario mean_bias
## <chr> <dbl>
## 1 Ukuran Sampel 500 0.00671
#7. Penyajian Hasil
#a. tabel ringkasan
perbandingan
## # A tibble: 5 × 2
## Skenario mean_bias
## <chr> <dbl>
## 1 Multikolinearitas 0.0482
## 2 Outlier 0.135
## 3 Ukuran Sampel 100 0.0447
## 4 Ukuran Sampel 50 0.124
## 5 Ukuran Sampel 500 0.00671
#b. grafik
barplot(perbandingan$mean_bias,
names.arg = perbandingan$Skenario,
las = 2,
col = "lightblue",
ylab = "Rata-rata Bias",
main = "Perbandingan Bias Antar Skenario")