knitr::opts_chunk$set(echo = TRUE)
pacman::p_load(pacman, tidyverse, MatchIt, marginaleffects, boot)
set.seed(54321)
R <- 999
(taken from here)
#Generating data similar to Austin (2009) for demonstrating treatment effect estimation
gen_X <- function(n) {
X <- matrix(rnorm(9 * n), nrow = n, ncol = 9)
X[,5] <- as.numeric(X[,5] < .5)
X
}
#~20% treated
gen_A <- function(X) {
LP_A <- - 1.2 + log(2)*X[,1] - log(1.5)*X[,2] + log(2)*X[,4] - log(2.4)*X[,5] + log(2)*X[,7] - log(1.5)*X[,8]
P_A <- plogis(LP_A)
rbinom(nrow(X), 1, P_A)
}
# Continuous outcome
gen_Y_C <- function(A, X) {
2*A + 2*X[,1] + 2*X[,2] + 2*X[,3] + 1*X[,4] + 2*X[,5] + 1*X[,6] + rnorm(length(A), 0, 5)
}
#Conditional:
# MD: 2
#Marginal:
# MD: 2
# Binary outcome
gen_Y_B <- function(A, X) {
LP_B <- -2 + log(2.4)*A + log(2)*X[,1] + log(2)*X[,2] + log(2)*X[,3] + log(1.5)*X[,4] + log(2.4)*X[,5] + log(1.5)*X[,6]
P_B <- plogis(LP_B)
rbinom(length(A), 1, P_B)
}
#Conditional:
# OR: 2.4
# logOR: .875
#Marginal:
# RD: .144
# RR: 1.54
# logRR: .433
# OR: 1.92
# logOR .655
# Survival outcome
gen_Y_S <- function(A, X) {
LP_S <- -2 + log(2.4)*A + log(2)*X[,1] + log(2)*X[,2] + log(2)*X[,3] + log(1.5)*X[,4] + log(2.4)*X[,5] + log(1.5)*X[,6]
sqrt(-log(runif(length(A)))*2e4*exp(-LP_S))
}
#Conditional:
# HR: 2.4
# logHR: .875
#Marginal:
# HR: 1.57
# logHR: .452
set.seed(19599)
n <- 2000
X <- gen_X(n)
A <- gen_A(X)
Y_C <- gen_Y_C(A, X)
Y_B <- gen_Y_B(A, X)
Y_S <- gen_Y_S(A, X)
d <- data.frame(A, X, Y_C, Y_B, Y_S)
d %>% head(10)
## A X1 X2 X3 X4 X5 X6 X7
## 1 0 0.1725432 -1.4282682 -0.410253771 -2.36059391 1 -1.1198630 0.6397597
## 2 0 -1.0958972 0.8462679 0.245567087 -0.12332771 1 -2.2686599 -1.4490669
## 3 0 0.1767731 0.7904544 -0.843573431 0.82365740 1 -0.2220610 0.2970925
## 4 0 -0.4595239 0.1725907 1.954226555 -0.62661325 1 -0.4018875 -0.8294041
## 5 1 0.3563142 -1.8120759 0.813487217 -0.67188804 1 -0.8297074 1.7296758
## 6 0 -2.4312962 -1.7983652 -1.293970033 0.04609167 1 -1.2418889 -1.1252320
## 7 0 1.8402265 1.7601210 -1.074574959 -1.64281560 1 1.4482270 0.7130736
## 8 0 -1.0961862 0.9764295 0.008303417 -0.22780215 1 1.3220791 -0.3000502
## 9 0 0.7807950 1.3137166 0.658034166 0.85398072 1 0.9495019 -0.5730730
## 10 1 -0.5651300 -0.1053473 -0.136854125 1.62328415 1 -0.5303725 -0.3341707
## X8 X9 Y_C Y_B Y_S
## 1 -0.4840279 -0.59385184 0.07104125 0 278.45921
## 2 -0.5514441 -0.31439333 0.15618693 0 330.62919
## 3 -0.6966028 -0.69515666 -0.85179850 1 369.94056
## 4 -0.5383526 0.20729344 -2.35183911 0 91.06106
## 5 -0.6438651 -0.02647566 0.68057840 0 182.72605
## 6 -1.8658885 -0.56513308 -5.62259966 0 2563.73131
## 7 0.6971722 -0.94672971 4.28650506 1 97.49147
## 8 2.6149417 0.71075506 12.65406427 1 98.49721
## 9 -0.2362267 -0.14580271 15.89771407 1 67.53084
## 10 0.4183507 0.46308458 1.07888291 1 113.69659
also adding an ordinal variable for later
d <- d %>% mutate(X_ORDINAL = ntile(X1, 4))
mNN <- matchit(A ~ X1 + X2 + X3 + X4 + X5 +
X6 + X7 + X8 + X9, data = d)
md <- match.data(mNN)
mNN
## A matchit object
## - method: 1:1 nearest neighbor matching without replacement
## - distance: Propensity score
## - estimated with logistic regression
## - number of obs.: 2000 (original), 882 (matched)
## - target estimand: ATT
## - covariates: X1, X2, X3, X4, X5, X6, X7, X8, X9
From this article.
estimate:
#Unique pair IDs
pair_ids <- levels(md$subclass)
#Unit IDs, split by pair membership
split_inds <- split(seq_len(nrow(md)), md$subclass)
cluster_boot_fun <- function(pairs, i) {
#Extract units corresponding to selected pairs
ids <- unlist(split_inds[pairs[i]])
#Subset md with block bootstrapped indices
boot_md <- md[ids,]
#Fit outcome model
fit <- glm(Y_B ~ A * (X1 + X2 + X3 + X4 + X5 +
X6 + X7 + X8 + X9),
data = boot_md, weights = weights,
family = quasibinomial())
## G-computation ##
#Subset to treated units for ATT; skip for ATE
md1 <- subset(boot_md, A == 1)
#Estimated potential outcomes under treatment
p1 <- predict(fit, type = "response",
newdata = transform(md1, A = 1))
Ep1 <- weighted.mean(p1, md1$weights)
#Estimated potential outcomes under control
p0 <- predict(fit, type = "response",
newdata = transform(md1, A = 0))
Ep0 <- weighted.mean(p0, md1$weights)
#Risk ratio
return(Ep1 / Ep0)
}
cluster_boot_out <- boot(pair_ids, cluster_boot_fun,
R = R)
cluster_boot_out
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = pair_ids, statistic = cluster_boot_fun, R = R)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 1.58816 0.006980789 0.1293416
CI:
boot.ci(cluster_boot_out, type = "bca")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 999 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = cluster_boot_out, type = "bca")
##
## Intervals :
## Level BCa
## 95% ( 1.363, 1.884 )
## Calculations and Intervals on Original Scale
G-comp. and bootstrap using
marginaleffect::inferences
Fitting a model:
fit <- glm(Y_B ~ A * (X1 + X2 + X3 + X4 + X5 +
X6 + X7 + X8 + X9),
data = md, weights = weights,
family = quasibinomial())
G-comp and estimating using delta method with clustering:
avg_comparisons(fit,
variables = "A",
vcov = ~subclass,
newdata = subset(md, A == "1"),
wts = "weights",
comparison = "lnratioavg",
transform = exp
)
##
## Term Contrast Estimate Pr(>|z|) S 2.5 % 97.5 %
## A ln(mean(1) / mean(0)) 1.59 <0.001 28.4 1.36 1.85
##
## Columns: term, contrast, estimate, p.value, s.value, conf.low, conf.high, predicted, predicted_hi, predicted_lo
Trying with bootstrap and getting an error:
avg_comparisons(fit,
variables = "A",
vcov = ~subclass, # REDUNDANT! See Noah's explanation
newdata = subset(md, A == "1"),
wts = "weights",
comparison = "lnratioavg",
transform = exp #THROWS ERROR
) %>%
inferences(method = "boot", R = R, conf_type = "bca")
# Error: Assertion failed. One of the following must apply:
# * checkmate::check_choice(x): Must be element of set {'exp','ln'}, but is not atomic scalar
# * checkmate::check_function(x): Must be a function, not 'list'
Getting vary different results when removing the offending lines:
avg_comparisons(fit,
variables = "A",
vcov = FALSE,
newdata = subset(md, A == "1"),
wts = "weights",
comparison = "lnratioavg"
) %>%
inferences(method = "boot", R = R, conf_type = "bca")
##
## Term Contrast Estimate Std. Error 2.5 % 97.5 %
## A ln(mean(1) / mean(0)) 0.463 0.0826 0.31 0.623
##
## Columns: term, contrast, estimate, predicted, predicted_hi, predicted_lo, std.error, conf.low, conf.high
Thanks you Noah.
Stratified bootstrap using
marginaleffects::avg_comparisons for G-comp with
clustering:
#Unique pair IDs
pair_ids <- levels(md$subclass)
#Unit IDs, split by pair membership
split_inds <- split(seq_len(nrow(md)), md$subclass)
cluster_boot_fun <- function(pairs, i) {
#Extract units corresponding to selected pairs
ids <- unlist(split_inds[pairs[i]])
#Subset md with block bootstrapped indices
boot_md <- md[ids,]
#Fit outcome model
fit <- glm(Y_B ~ A * (X1 + X2 + X3 + X4 + X5 +
X6 + X7 + X8 + X9),
data = boot_md, weights = weights,
family = quasibinomial())
## G-computation ##
est <- avg_comparisons(fit,
variables = "A",
vcov = FALSE,
newdata = subset(boot_md, A == "1"),
wts = "weights",
comparison = "ratioavg",
by = "X_ORDINAL")
setNames(est$estimate, paste0(est$contrast, est$X_ORDINAL, sep = "|"))
}
cluster_boot_out <- boot(pair_ids, cluster_boot_fun,
R = R)
cluster_boot_out
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = pair_ids, statistic = cluster_boot_fun, R = R)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* 1.946937 -0.1306310 0.6397014
## t2* 1.400232 0.4611820 0.6400206
## t3* 2.637012 -0.6757808 0.6691783
## t4* 1.670915 0.5997303 0.8405886
Pay attention:
by = "X_ORDINAL"
variableboot.ci for a specific stratum use
index, for example index = 2 (the default is
1, see ?boot.ci)