Model Estimation and Simulation using Logistic Regression

set.seed(1)

beta0 <- -1
beta1 <- 0.8
beta2 <- -0.5

true_beta <- c(beta0, beta1, beta2)

n <- 100
n_sim <- 500

# Scenario 1: Normal 
sim_normal <- function(){
  x1 <- rnorm(n)
  x2 <- rnorm(n)
  
  p <- 1 / (1 + exp(-(beta0 + beta1*x1 + beta2*x2)))
  y <- rbinom(n, 1, p)
  
  coef(glm(y ~ x1 + x2, family = binomial))
}

# Scenario 2: Multicollinearity
sim_multi <- function(){
  x1 <- rnorm(n)
  x2 <- x1 + rnorm(n, 0, 0.1)   # highly correlated
  
  p <- 1 / (1 + exp(-(beta0 + beta1*x1 + beta2*x2)))
  y <- rbinom(n, 1, p)
  
  coef(glm(y ~ x1 + x2, family = binomial))
}

# Scenario 3: Outliers
sim_outlier <- function(){
  x1 <- rnorm(n)
  x2 <- rnorm(n)
  
  # add outliers
  x1[1:5] <- x1[1:5] * 10
  
  p <- 1 / (1 + exp(-(beta0 + beta1*x1 + beta2*x2)))
  y <- rbinom(n, 1, p)
  
  coef(glm(y ~ x1 + x2, family = binomial))
}

est_normal  <- t(replicate(n_sim, sim_normal()))
est_multi   <- t(replicate(n_sim, sim_multi()))
est_outlier <- t(replicate(n_sim, sim_outlier()))

## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred

colnames(est_normal)  <- c("beta0","beta1","beta2")
colnames(est_multi)   <- c("beta0","beta1","beta2")
colnames(est_outlier) <- c("beta0","beta1","beta2")

compute_metrics <- function(estimates, true_beta){
  
  mean_est <- colMeans(estimates)
  bias     <- mean_est - true_beta
  var_est  <- apply(estimates, 2, var)
  mse      <- bias^2 + var_est
  
  data.frame(
    Parameter = c("beta0","beta1","beta2"),
    True = true_beta,
    Mean = mean_est,
    Bias = bias,
    Variance = var_est,
    MSE = mse
  )
}

result_normal  <- compute_metrics(est_normal, true_beta)
result_multi   <- compute_metrics(est_multi, true_beta)
result_outlier <- compute_metrics(est_outlier, true_beta)

print(result_normal)

##       Parameter True       Mean        Bias   Variance        MSE
## beta0     beta0 -1.0 -1.0482006 -0.04820055 0.08063500 0.08295829
## beta1     beta1  0.8  0.8438121  0.04381205 0.08846026 0.09037976
## beta2     beta2 -0.5 -0.5407493 -0.04074934 0.07876834 0.08042885

print(result_multi)

##       Parameter True       Mean         Bias   Variance        MSE
## beta0     beta0 -1.0 -1.0562223 -0.056222348 0.05341556 0.05657652
## beta1     beta1  0.8  0.7916456 -0.008354391 5.49843047 5.49850027
## beta2     beta2 -0.5 -0.4655978  0.034402166 5.40579820 5.40698171

print(result_outlier)

##       Parameter True       Mean        Bias   Variance        MSE
## beta0     beta0 -1.0 -1.0629953 -0.06299529 0.07331207 0.07728047
## beta1     beta1  0.8  0.8611863  0.06118634 0.07742440 0.08116817
## beta2     beta2 -0.5 -0.5450172 -0.04501723 0.07420772 0.07623427

Model Estimation and Simulation using Logistic Regression

Taufik Dwi Ferdiansyah

2026-04-17