set.seed(1)
beta0 <- -1
beta1 <- 0.8
beta2 <- -0.5
true_beta <- c(beta0, beta1, beta2)
n <- 100
n_sim <- 500
# Scenario 1: Normal
sim_normal <- function(){
x1 <- rnorm(n)
x2 <- rnorm(n)
p <- 1 / (1 + exp(-(beta0 + beta1*x1 + beta2*x2)))
y <- rbinom(n, 1, p)
coef(glm(y ~ x1 + x2, family = binomial))
}
# Scenario 2: Multicollinearity
sim_multi <- function(){
x1 <- rnorm(n)
x2 <- x1 + rnorm(n, 0, 0.1) # highly correlated
p <- 1 / (1 + exp(-(beta0 + beta1*x1 + beta2*x2)))
y <- rbinom(n, 1, p)
coef(glm(y ~ x1 + x2, family = binomial))
}
# Scenario 3: Outliers
sim_outlier <- function(){
x1 <- rnorm(n)
x2 <- rnorm(n)
# add outliers
x1[1:5] <- x1[1:5] * 10
p <- 1 / (1 + exp(-(beta0 + beta1*x1 + beta2*x2)))
y <- rbinom(n, 1, p)
coef(glm(y ~ x1 + x2, family = binomial))
}
est_normal <- t(replicate(n_sim, sim_normal()))
est_multi <- t(replicate(n_sim, sim_multi()))
est_outlier <- t(replicate(n_sim, sim_outlier()))
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
colnames(est_normal) <- c("beta0","beta1","beta2")
colnames(est_multi) <- c("beta0","beta1","beta2")
colnames(est_outlier) <- c("beta0","beta1","beta2")
compute_metrics <- function(estimates, true_beta){
mean_est <- colMeans(estimates)
bias <- mean_est - true_beta
var_est <- apply(estimates, 2, var)
mse <- bias^2 + var_est
data.frame(
Parameter = c("beta0","beta1","beta2"),
True = true_beta,
Mean = mean_est,
Bias = bias,
Variance = var_est,
MSE = mse
)
}
result_normal <- compute_metrics(est_normal, true_beta)
result_multi <- compute_metrics(est_multi, true_beta)
result_outlier <- compute_metrics(est_outlier, true_beta)
print(result_normal)
## Parameter True Mean Bias Variance MSE
## beta0 beta0 -1.0 -1.0482006 -0.04820055 0.08063500 0.08295829
## beta1 beta1 0.8 0.8438121 0.04381205 0.08846026 0.09037976
## beta2 beta2 -0.5 -0.5407493 -0.04074934 0.07876834 0.08042885
print(result_multi)
## Parameter True Mean Bias Variance MSE
## beta0 beta0 -1.0 -1.0562223 -0.056222348 0.05341556 0.05657652
## beta1 beta1 0.8 0.7916456 -0.008354391 5.49843047 5.49850027
## beta2 beta2 -0.5 -0.4655978 0.034402166 5.40579820 5.40698171
print(result_outlier)
## Parameter True Mean Bias Variance MSE
## beta0 beta0 -1.0 -1.0629953 -0.06299529 0.07331207 0.07728047
## beta1 beta1 0.8 0.8611863 0.06118634 0.07742440 0.08116817
## beta2 beta2 -0.5 -0.5450172 -0.04501723 0.07420772 0.07623427