References
Load packages
library(magrittr)
library(doMC) # parallel backend to foreach/plyr
registerDoMC() # Turn on multicore processing
options(cores = 4, mc.cores = 4) # Configuration for parallel package
Compare marginal effect from interaction model and no interaction model
## Simulator for a single iteration
Sim <- function(n = 10^4, Zprev = 0.3, XprevZ1 = 0.7, XprevZ0 = 0.3,
beta0 = 1.5, beta1 = 1.0, beta2 = 2.0, beta3 = 0.5, sigma = 0.4) {
## Z is both a confounder and effect modifier
## Prevalence of Z = 1 is Zprev
dat <- data.frame(Z = rbinom(n = n, size = 1, prob = Zprev))
## Treatment X is more or less likely if Z = 1
dat$X <- rbinom(n = n, size = 1, prob = XprevZ1*dat$Z + XprevZ0*(1 - dat$Z))
## True data-generating model for the outcome
dat$Y <- with(dat, (beta0 + beta1*X + beta2*Z + beta3*X*Z + rnorm(n = n, 0, sigma)))
## True marginal effect
trueMarginal <- (beta1 + beta3) * Zprev + beta1 * (1 - Zprev)
## Model with the same structure as the true model
modelFull <- lm(Y ~ X + Z + X:Z, data = dat)
estimator1 <- (coef(modelFull)["X"] + coef(modelFull)["X:Z"]) * mean(dat$Z) +
coef(modelFull)["X"] * (1 - mean(dat$Z))
## Model omitting the interaction
modelNoInt <- lm(Y ~ X + Z, data = dat)
estimator2 <- coef(modelNoInt)["X"]
## Return
c(trueMarginal = trueMarginal, estimator1 = estimator1, estimator2 = estimator2)
}
## Iterator
Iterate <- function(R = 1000, ...) {
mclapply(seq_len(R), function(i) {Sim(...)}) %>%
do.call(rbind, .) %>%
colMeans
}
## Run with increasing n
system.time({
print(Iterate(R = 10^3, beta3 = 0.5))
print(Iterate(R = 10^3, beta3 = 1.0))
print(Iterate(R = 10^3, beta3 = 2.0))
})
## trueMarginal estimator1.X estimator2.X
## 1.150000 1.149650 1.149602
## trueMarginal estimator1.X estimator2.X
## 1.300000 1.299796 1.299863
## trueMarginal estimator1.X estimator2.X
## 1.600000 1.599738 1.599509
## user system elapsed
## 197.261 3.659 61.499