Clean Copula Simulation

Copula Simulation for Endogeneity Correction

Comparison Between Naive (Unadjusted) and Copula-Adjusted Regression: Multiple Simulated Comparisons Across Confounding Strengths:

simulate_copula_vs_naive2 <- function(n = 10000, 
                                     true_beta = 0.10, 
                                     u_strengths = seq(0.1, 0.9, 0.1), 
                                     sims = 100,
                                     skew_rate = 0.25,
                                     noise_sd = 0.1,
                                     return_data = FALSE) {
  library(dplyr)
  library(ggplot2)
  library(tidyr)
  
  results <- data.frame()
  
  for (b1 in u_strengths) {
    bias_naive <- c()
    bias_corrected <- c()
    
    for (i in 1:sims) {
      ResX <- rnorm(n, mean = 0, sd = (sqrt(1-(b1^2))))
      ResY <- rnorm(n, mean = 0, sd = (sqrt(1-(b1^2+true_beta^2))))
      u <- rnorm(n)
      x <- b1 * u + ResX
      x <- exp(x)
      x <- scale(x, center = TRUE, scale = TRUE)
      y <- true_beta * x + b1 * u + ResY
      
      df <- data.frame(x = x, y = y)
      df$rank_x <- rank(df$x) / (n + 1)
      df$copula <- qnorm(df$rank_x)
      
      naive_model <- lm(y ~ x, data = df)
      corrected_model <- lm(y ~ x + copula, data = df)
      
      naive_coef <- coef(naive_model)["x"]
      corrected_coef <- coef(corrected_model)["x"]
      
      bias_naive[i] <- abs(naive_coef - true_beta)
      bias_corrected[i] <- abs(corrected_coef - true_beta)
    }
    
    results <- rbind(results,
                     data.frame(
                       u_strength = b1,
                       mean_bias_naive = mean(bias_naive),
                       mean_bias_copula = mean(bias_corrected)
                     ))
  }
  
  results_long <- results %>%
    pivot_longer(cols = c(mean_bias_naive, mean_bias_copula),
                 names_to = "Model",
                 values_to = "Bias")
  
  if (return_data) {
    return(results_long)  
  } else {

    ggplot(results_long, aes(x = u_strength, y = Bias, color = Model, group = Model)) +
      geom_line(linewidth = 1.2) +
      geom_point(size = 2) +
      labs(title = "Bias vs U Confounding Strength",
           x = "U strength (Confounding Level)",
           y = "Mean Absolute Bias",
           color = "Model") +
      theme_minimal(base_size = 14) +
      scale_color_manual(values = c("mean_bias_naive" = "red",
                                    "mean_bias_copula" = "blue"),
                         labels = c("Copula", "Naive"))
  }
}

simulate_copula_vs_naive2()


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

As the strength of the confounder increases, average bias in the causal estimate increases substantially. However, using the copula term to adjust the regression model keeps average estimate bias extremely low even as the strength of the confounder increases. This method requires a