simulate_copula_vs_naive2 <- function(n = 10000,
true_beta = 0.10,
u_strengths = seq(0.1, 0.9, 0.1),
sims = 100,
skew_rate = 0.25,
noise_sd = 0.1,
return_data = FALSE) {
library(dplyr)
library(ggplot2)
library(tidyr)
results <- data.frame()
for (b1 in u_strengths) {
bias_naive <- c()
bias_corrected <- c()
for (i in 1:sims) {
ResX <- rnorm(n, mean = 0, sd = (sqrt(1-(b1^2))))
ResY <- rnorm(n, mean = 0, sd = (sqrt(1-(b1^2+true_beta^2))))
u <- rnorm(n)
x <- b1 * u + ResX
x <- exp(x)
x <- scale(x, center = TRUE, scale = TRUE)
y <- true_beta * x + b1 * u + ResY
df <- data.frame(x = x, y = y)
df$rank_x <- rank(df$x) / (n + 1)
df$copula <- qnorm(df$rank_x)
naive_model <- lm(y ~ x, data = df)
corrected_model <- lm(y ~ x + copula, data = df)
naive_coef <- coef(naive_model)["x"]
corrected_coef <- coef(corrected_model)["x"]
bias_naive[i] <- abs(naive_coef - true_beta)
bias_corrected[i] <- abs(corrected_coef - true_beta)
}
results <- rbind(results,
data.frame(
u_strength = b1,
mean_bias_naive = mean(bias_naive),
mean_bias_copula = mean(bias_corrected)
))
}
results_long <- results %>%
pivot_longer(cols = c(mean_bias_naive, mean_bias_copula),
names_to = "Model",
values_to = "Bias")
if (return_data) {
return(results_long)
} else {
ggplot(results_long, aes(x = u_strength, y = Bias, color = Model, group = Model)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
labs(title = "Bias vs U Confounding Strength",
x = "U strength (Confounding Level)",
y = "Mean Absolute Bias",
color = "Model") +
theme_minimal(base_size = 14) +
scale_color_manual(values = c("mean_bias_naive" = "red",
"mean_bias_copula" = "blue"),
labels = c("Copula", "Naive"))
}
}Clean Copula Simulation
Copula Simulation for Endogeneity Correction
Comparison Between Naive (Unadjusted) and Copula-Adjusted Regression: Multiple Simulated Comparisons Across Confounding Strengths:
simulate_copula_vs_naive2()
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
As the strength of the confounder increases, average bias in the causal estimate increases substantially. However, using the copula term to adjust the regression model keeps average estimate bias extremely low even as the strength of the confounder increases. This method requires a