<- function(n = 10000,
simulate_copula_vs_naive2 true_beta = 0.10,
u_strengths = seq(0.1, 0.9, 0.1),
sims = 100,
skew_rate = 0.25,
noise_sd = 0.1,
return_data = FALSE) {
library(dplyr)
library(ggplot2)
library(tidyr)
<- data.frame()
results
for (b1 in u_strengths) {
<- c()
bias_naive <- c()
bias_corrected
for (i in 1:sims) {
<- rnorm(n, mean = 0, sd = (sqrt(1-(b1^2))))
ResX <- rnorm(n, mean = 0, sd = (sqrt(1-(b1^2+true_beta^2))))
ResY <- rnorm(n)
u <- b1 * u + ResX
x <- exp(x)
x <- scale(x, center = TRUE, scale = TRUE)
x <- true_beta * x + b1 * u + ResY
y
<- data.frame(x = x, y = y)
df $rank_x <- rank(df$x) / (n + 1)
df$copula <- qnorm(df$rank_x)
df
<- lm(y ~ x, data = df)
naive_model <- lm(y ~ x + copula, data = df)
corrected_model
<- coef(naive_model)["x"]
naive_coef <- coef(corrected_model)["x"]
corrected_coef
<- abs(naive_coef - true_beta)
bias_naive[i] <- abs(corrected_coef - true_beta)
bias_corrected[i]
}
<- rbind(results,
results data.frame(
u_strength = b1,
mean_bias_naive = mean(bias_naive),
mean_bias_copula = mean(bias_corrected)
))
}
<- results %>%
results_long pivot_longer(cols = c(mean_bias_naive, mean_bias_copula),
names_to = "Model",
values_to = "Bias")
if (return_data) {
return(results_long)
else {
}
ggplot(results_long, aes(x = u_strength, y = Bias, color = Model, group = Model)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
labs(title = "Bias vs U Confounding Strength",
x = "U strength (Confounding Level)",
y = "Mean Absolute Bias",
color = "Model") +
theme_minimal(base_size = 14) +
scale_color_manual(values = c("mean_bias_naive" = "red",
"mean_bias_copula" = "blue"),
labels = c("Copula", "Naive"))
} }
Clean Copula Simulation
Copula Simulation for Endogeneity Correction
Comparison Between Naive (Unadjusted) and Copula-Adjusted Regression: Multiple Simulated Comparisons Across Confounding Strengths:
simulate_copula_vs_naive2()
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
As the strength of the confounder increases, average bias in the causal estimate increases substantially. However, using the copula term to adjust the regression model keeps average estimate bias extremely low even as the strength of the confounder increases. This method requires a