3 SKENARIO: Ukuran Sampel, Multikolinearitas, Outlier Replikasi: 100 dataset per skenario
set.seed(123)
# Parameter sebenarnya
beta0 <- -1
beta1 <- 2
beta2 <- -1.5
# Fungsi untuk generate data
generate_data <- function(n, scenario){
# Skenario 1: Normal (untuk ukuran sampel)
if(scenario == "normal"){
X1 <- rnorm(n)
X2 <- rnorm(n)
}
# Skenario 2: Multikolinearitas
if(scenario == "multicol"){
X1 <- rnorm(n)
X2 <- X1 + rnorm(n, sd=0.1) # sangat berkorelasi
}
# Skenario 3: Outlier
if(scenario == "outlier"){
X1 <- rnorm(n)
X2 <- rnorm(n)
X1[1:5] <- X1[1:5] * 10 # outlier
}
# Probabilitas
p <- 1 / (1 + exp(-(beta0 + beta1*X1 + beta2*X2)))
# Respon
Y <- rbinom(n, 1, p)
data.frame(Y, X1, X2)
}
# Fungsi simulasi
run_simulation <- function(n, scenario){
results <- matrix(NA, nrow=100, ncol=3)
colnames(results) <- c("beta0", "beta1", "beta2")
for(i in 1:100){
data <- generate_data(n, scenario)
model <- glm(Y ~ X1 + X2, data=data, family=binomial)
coef_est <- coef(model)
results[i, ] <- coef_est
}
results <- as.data.frame(results)
# Rata-rata estimasi
mean_est <- colMeans(results)
# Bias
bias <- c(mean_est[1] - beta0,
mean_est[2] - beta1,
mean_est[3] - beta2)
list(mean_est = mean_est, bias = bias)
}
MENJALANKAN SIMULASI
# Skenario 1: Ukuran Sampel
sim_small <- run_simulation(50, "normal")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
sim_large <- run_simulation(500, "normal")
# Skenario 2: Multikolinearitas
sim_multicol <- run_simulation(200, "multicol")
# Skenario 3: Outlier
sim_outlier <- run_simulation(200, "outlier")
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
HASIL
cat("===== HASIL SIMULASI =====\n\n")
## ===== HASIL SIMULASI =====
cat("Skenario 1 (n kecil = 50)\n")
## Skenario 1 (n kecil = 50)
print(sim_small)
## $mean_est
## beta0 beta1 beta2
## -1.350182 2.680340 -2.070260
##
## $bias
## beta0 beta1 beta2
## -0.3501820 0.6803405 -0.5702605
cat("\nSkenario 1 (n besar = 500)\n")
##
## Skenario 1 (n besar = 500)
print(sim_large)
## $mean_est
## beta0 beta1 beta2
## -0.9851206 1.9991595 -1.5168550
##
## $bias
## beta0 beta1 beta2
## 0.0148794086 -0.0008404967 -0.0168549626
cat("\nSkenario 2 (Multikolinearitas)\n")
##
## Skenario 2 (Multikolinearitas)
print(sim_multicol)
## $mean_est
## beta0 beta1 beta2
## -0.9980428 2.0702489 -1.5900828
##
## $bias
## beta0 beta1 beta2
## 0.001957181 0.070248949 -0.090082808
cat("\nSkenario 3 (Outlier)\n")
##
## Skenario 3 (Outlier)
print(sim_outlier)
## $mean_est
## beta0 beta1 beta2
## -1.026408 2.047408 -1.518578
##
## $bias
## beta0 beta1 beta2
## -0.02640843 0.04740810 -0.01857830
INTERPRETASI
cat("\n===== INTERPRETASI =====\n")
##
## ===== INTERPRETASI =====
cat("
Dari hasil simulasi terlihat bahwa:
Pada n kecil (50), hasil estimasi lebih bervariasi dan biasnya lebih besar.
Saat n diperbesar (500), estimasi jadi lebih stabil dan mendekati parameter asli.
Pada kasus multikolinearitas, nilai koefisien cenderung tidak stabil, terutama pada beta1 dan beta2.
Adanya outlier juga mempengaruhi hasil estimasi, meskipun dampaknya tidak sebesar multikolinearitas.
Secara keseluruhan, skenario dengan sampel besar memberikan hasil terbaik.
")
##
## Dari hasil simulasi terlihat bahwa:
## Pada n kecil (50), hasil estimasi lebih bervariasi dan biasnya lebih besar.
## Saat n diperbesar (500), estimasi jadi lebih stabil dan mendekati parameter asli.
## Pada kasus multikolinearitas, nilai koefisien cenderung tidak stabil, terutama pada beta1 dan beta2.
## Adanya outlier juga mempengaruhi hasil estimasi, meskipun dampaknya tidak sebesar multikolinearitas.
## Secara keseluruhan, skenario dengan sampel besar memberikan hasil terbaik.