library(grf)
library(haven)
library(dplyr)
library(ggplot2)
library(corrplot)
library(caret)
library(sandwich)
library(lmtest)
library(margins)
library(Hmisc)
library(tidyr)
# Data: Hidrobo et al. (2016) replication archive, Harvard Dataverse
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/YBGPAP
# Place DV_baseline_followup.dta at Data-and-dofiles/Data/ relative to this file.
df_raw <- read_dta("Data-and-dofiles/Data/DV_baseline_followup.dta")
dim(df_raw)
## [1] 2357 229
The original do-file constructs treatment indicators, IPV outcomes, control variables, and sample eligibility flags. We replicate this step by step.
df <- df_raw
# --- Treatment indicators ---
df$p_treat <- as.numeric(df$bl_p_treat == 1)
df$Treatment <- as.numeric(df$bl_modalidad)
df$treat_food <- as.numeric(df$Treatment == 2)
df$treat_cash <- as.numeric(df$Treatment == 3)
df$treat_voucher <- as.numeric(df$Treatment == 4)
# --- Relationship indicator ---
df$bl_inrelat <- as.numeric(df$bl_b06 == 2 | df$bl_b06 == 3)
df$el_inrelat <- as.numeric(df$el_b06 == 2 | df$el_b06 == 3)
# --- Province ---
df$bl_carchi <- as.numeric(df$bl_a02 == 1)
# --- Endline IPV outcomes (last 6 months) ---
ipv_el_vars <- c("el_t10b","el_t11b","el_t12b","el_t13b","el_t14b",
"el_t15b","el_t16b","el_t17b","el_t18b","el_t19b",
"el_t20b","el_t21b","el_t22b","el_t23b",
"el_t24b","el_t25b","el_t26b","el_t27b","el_t28b")
ipv_el_life <- c("el_t10","el_t11","el_t12","el_t13","el_t14",
"el_t15","el_t16","el_t17","el_t18","el_t19",
"el_t20","el_t21","el_t22","el_t23",
"el_t24","el_t25","el_t26","el_t27","el_t28")
for (i in seq_along(ipv_el_vars)) {
bvar <- ipv_el_vars[i]
lvar <- ipv_el_life[i]
if (bvar %in% names(df) & lvar %in% names(df)) {
df[[bvar]] <- ifelse(is.na(df[[bvar]]) & df[[lvar]] == 0, 0, df[[bvar]])
df[[bvar]] <- as.numeric(df[[bvar]] == 1)
}
}
# Same for baseline
ipv_bl_vars <- c("bl_t10b","bl_t11b","bl_t12b","bl_t13b","bl_t14b",
"bl_t15b","bl_t16b","bl_t17b","bl_t18b","bl_t19b",
"bl_t20b","bl_t21b","bl_t22b","bl_t23b")
ipv_bl_life <- c("bl_t10","bl_t11","bl_t12","bl_t13","bl_t14",
"bl_t15","bl_t16","bl_t17","bl_t18","bl_t19",
"bl_t20","bl_t21","bl_t22","bl_t23")
for (i in seq_along(ipv_bl_vars)) {
bvar <- ipv_bl_vars[i]
lvar <- ipv_bl_life[i]
if (bvar %in% names(df) & lvar %in% names(df)) {
df[[bvar]] <- ifelse(is.na(df[[bvar]]) & df[[lvar]] == 0, 0, df[[bvar]])
df[[bvar]] <- as.numeric(df[[bvar]] == 1)
}
}
# --- Composite IPV outcomes ---
# Endline
df$el_control <- as.numeric(df$el_t10b == 1 | df$el_t11b == 1 |
df$el_t25b == 1 | df$el_t26b == 1)
df$el_emotional <- as.numeric(df$el_t12b == 1 | df$el_t13b == 1 | df$el_t14b == 1 |
df$el_t24b == 1 | df$el_t27b == 1 | df$el_t28b == 1)
df$el_physical <- as.numeric(df$el_t15b == 1 | df$el_t16b == 1 | df$el_t17b == 1 |
df$el_t18b == 1 | df$el_t19b == 1 | df$el_t20b == 1 |
df$el_t21b == 1 | df$el_t22b == 1 | df$el_t23b == 1)
# Baseline
df$bl_control <- as.numeric(df$bl_t10b == 1 | df$bl_t11b == 1)
df$bl_emotional <- as.numeric(df$bl_t12b == 1 | df$bl_t13b == 1 | df$bl_t14b == 1)
df$bl_physical <- as.numeric(df$bl_t15b == 1 | df$bl_t16b == 1 | df$bl_t17b == 1 |
df$bl_t18b == 1 | df$bl_t19b == 1 | df$bl_t20b == 1 |
df$bl_t21b == 1 | df$bl_t22b == 1 | df$bl_t23b == 1)
# Baseline lifetime violence
df$bl_lphysical <- as.numeric(df$bl_t15 == 1 | df$bl_t16 == 1 | df$bl_t17 == 1 |
df$bl_t18 == 1 | df$bl_t19 == 1 | df$bl_t20 == 1 |
df$bl_t21 == 1)
df$bl_lsexual <- as.numeric(df$bl_t22 == 1 | df$bl_t23 == 1)
df$bl_lphysex <- as.numeric(df$bl_lphysical == 1 | df$bl_lsexual == 1)
# --- Control variables ---
df$bl_edu_sec <- as.numeric(df$bl_b09 > 3)
df$bl_age <- as.numeric(df$bl_b04)
df$bl_colom <- as.numeric(df$bl_b08 == 2)
df$bl_married <- as.numeric(df$bl_b06 == 3)
df$bl_indig <- as.numeric(df$bl_b07 == 2)
df$bl_black <- as.numeric(df$bl_b07 == 3 | df$bl_b07 == 5)
df$bl_head <- as.numeric(df$bl_b03 == 1)
df$bl_dempl <- as.numeric(df$bl_d05 != 5)
df$ownhouse <- as.numeric(
(df$bl_e12 == 1 & df$bl_b03 == 1) | (df$bl_e12 == 2 & df$bl_b03 == 2)
)
# --- Partner variables ---
df$bl_partn_edu <- ifelse(df$bl_b03 == 2, df$bl_mhead_edu, df$bl_mspouse_edu)
df$bl_partn_sedu <- as.numeric(df$bl_partn_edu >= 4 & df$bl_partn_edu < 9)
df$bl_partn_age <- ifelse(df$bl_b03 == 2, df$bl_mhead_age, df$bl_mspouse_age)
# --- Household controls ---
df$bl_nchildren0t5 <- as.numeric(df$bl_nchildren0t5)
df$bl_nchildren6t14 <- as.numeric(df$bl_nchildren6t14)
df$bl_asset_cat <- as.numeric(cut(df$bl_asset_index,
breaks = quantile(df$bl_asset_index, probs = 0:4/4, na.rm = TRUE),
include.lowest = TRUE, labels = 1:4))
df$wealth2 <- as.numeric(df$bl_asset_cat == 2)
df$wealth3 <- as.numeric(df$bl_asset_cat == 3)
df$wealth4 <- as.numeric(df$bl_asset_cat == 4)
df$norooms <- as.numeric(df$bl_e06 == 0)
# --- Consumption ---
df$bl_totcons_pc <- as.numeric(df$bl_fneh_totcons_pc) * (365/12)
df$bl_nfcons_pc <- as.numeric(df$bl_nfcons_pc) * (365/12)
# --- Eligibility & analysis sample ---
df$eligible <- as.numeric(
df$bl_inrelat == 1 &
as.numeric(df$bl_b04) < 70 &
df$bl_tnohaymujer == 1 &
(df$bl_b03 == 1 | df$bl_b03 == 2)
)
df$foranalysis <- as.numeric(
df$bl_tb == 1 & df$el_tb == 1 & df$diffid == 0 & df$eligible == 1
)
cat("Full dataset:", nrow(df), "observations\n")
## Full dataset: 2357 observations
cat("Analysis sample (foranalysis == 1):", sum(df$foranalysis == 1, na.rm = TRUE), "observations\n")
## Analysis sample (foranalysis == 1): 1226 observations
dfa <- df %>% filter(foranalysis == 1)
cat("Analysis sample:", nrow(dfa), "observations\n")
## Analysis sample: 1226 observations
balance_vars <- c("bl_head", "bl_colom", "bl_age", "bl_edu_sec", "bl_married",
"bl_indig", "bl_black", "bl_dempl", "ownhouse",
"bl_partn_sedu", "bl_partn_age",
"bl_nchildren0t5", "bl_nchildren6t14", "bl_totcons_pc", "norooms",
"bl_lphysex", "bl_control", "bl_emotional", "bl_physical", "bl_asset_index", "bl_carchi")
balance_results <- data.frame(
Variable = balance_vars,
Mean_Control = NA, Mean_Treatment = NA, P_value = NA
)
for (i in seq_along(balance_vars)) {
v <- balance_vars[i]
if (v %in% names(dfa)) {
ctrl <- dfa[[v]][dfa$p_treat == 0]
trt <- dfa[[v]][dfa$p_treat == 1]
balance_results$Mean_Control[i] <- round(mean(ctrl, na.rm = TRUE), 3)
balance_results$Mean_Treatment[i] <- round(mean(trt, na.rm = TRUE), 3)
tt <- try(t.test(ctrl, trt), silent = TRUE)
if (!inherits(tt, "try-error")) {
balance_results$P_value[i] <- round(tt$p.value, 3)
}
}
}
print(balance_results)
## Variable Mean_Control Mean_Treatment P_value
## 1 bl_head 0.032 0.018 0.191
## 2 bl_colom 0.394 0.331 0.042
## 3 bl_age 35.293 34.664 0.409
## 4 bl_edu_sec 0.380 0.387 0.812
## 5 bl_married 0.417 0.426 0.792
## 6 bl_indig 0.035 0.043 0.488
## 7 bl_black 0.061 0.070 0.540
## 8 bl_dempl 0.307 0.326 0.530
## 9 ownhouse 0.041 0.050 0.469
## 10 bl_partn_sedu 0.357 0.393 0.237
## 11 bl_partn_age 39.203 38.390 0.319
## 12 bl_nchildren0t5 0.722 0.755 0.523
## 13 bl_nchildren6t14 1.023 0.873 0.040
## 14 bl_totcons_pc 107.324 106.311 0.821
## 15 norooms 0.128 0.153 0.237
## 16 bl_lphysex 0.333 0.351 0.563
## 17 bl_control 0.171 0.167 0.862
## 18 bl_emotional 0.243 0.267 0.398
## 19 bl_physical 0.125 0.176 0.020
## 20 bl_asset_index 0.685 0.338 0.004
## 21 bl_carchi 0.330 0.397 0.027
We replicate the probit regressions from Table 2 using LPM (linear probability models) as in the robustness appendix, since marginal effects from probit in R give equivalent results.
controls_formula <- "bl_head + bl_colom + bl_married + bl_indig + bl_black +
bl_age + bl_edu_sec + bl_dempl + ownhouse +
bl_partn_sedu + bl_partn_age +
bl_nchildren0t5 + bl_nchildren6t14 +
wealth2 + wealth3 + wealth4 + bl_carchi"
outcomes <- c("el_control", "el_emotional", "el_physical")
bl_outcomes <- c("bl_control", "bl_emotional", "bl_physical")
cat("=== Table 2: Impact of Pooled Treatment on IPV (LPM) ===\n\n")
## === Table 2: Impact of Pooled Treatment on IPV (LPM) ===
for (i in 1:3) {
outcome <- outcomes[i]
bl_outcome <- bl_outcomes[i]
cat("--- Outcome:", outcome, "---\n")
ctrl_mean <- mean(dfa[[outcome]][dfa$p_treat == 0], na.rm = TRUE)
cat("Endline control mean:", round(ctrl_mean, 3), "\n")
f1 <- as.formula(paste(outcome, "~ p_treat"))
m1 <- lm(f1, data = dfa)
f2 <- as.formula(paste(outcome, "~ p_treat +", bl_outcome, "+ bl_carchi"))
m2 <- lm(f2, data = dfa)
f3 <- as.formula(paste(outcome, "~ p_treat +", bl_outcome, "+", controls_formula))
m3 <- lm(f3, data = dfa)
se1 <- sqrt(diag(vcovCL(m1, cluster = dfa$bl_a05)))
se2 <- sqrt(diag(vcovCL(m2, cluster = dfa$bl_a05)))
se3 <- sqrt(diag(vcovCL(m3, cluster = dfa$bl_a05)))
cat(" No controls: coef =", round(coef(m1)["p_treat"], 3),
" SE =", round(se1["p_treat"], 3), "\n")
cat(" Basic controls: coef =", round(coef(m2)["p_treat"], 3),
" SE =", round(se2["p_treat"], 3), "\n")
cat(" Extended controls: coef =", round(coef(m3)["p_treat"], 3),
" SE =", round(se3["p_treat"], 3), "\n\n")
}
## --- Outcome: el_control ---
## Endline control mean: 0.362
## No controls: coef = -0.072 SE = 0.034
## Basic controls: coef = -0.066 SE = 0.033
## Extended controls: coef = -0.068 SE = 0.034
##
## --- Outcome: el_emotional ---
## Endline control mean: 0.351
## No controls: coef = -0.04 SE = 0.036
## Basic controls: coef = -0.043 SE = 0.034
## Extended controls: coef = -0.04 SE = 0.034
##
## --- Outcome: el_physical ---
## Endline control mean: 0.197
## No controls: coef = -0.044 SE = 0.035
## Basic controls: coef = -0.063 SE = 0.033
## Extended controls: coef = -0.06 SE = 0.033
cf_vars <- c("bl_head", "bl_colom", "bl_age", "bl_edu_sec", "bl_married",
"bl_indig", "bl_black", "bl_dempl", "ownhouse",
"bl_partn_sedu", "bl_partn_age",
"bl_nchildren0t5", "bl_nchildren6t14",
"bl_asset_index", "norooms", "bl_carchi",
"bl_physical", "bl_emotional", "bl_control")
cf_data <- dfa %>%
select(all_of(c("el_physical", "el_emotional", "el_control",
"p_treat", "bl_a05", cf_vars))) %>%
na.omit()
# Policy-relevant covariates: exclude cross-outcome baseline IPV variables
# (e.g., baseline emotional/controlling abuse is not actionable for targeting physical violence)
cf_vars_policy_phys <- setdiff(cf_vars, c("bl_emotional", "bl_control"))
cf_vars_policy_ctrl <- setdiff(cf_vars, c("bl_physical", "bl_emotional"))
cf_vars_policy_emot <- setdiff(cf_vars, c("bl_physical", "bl_control"))
# Purely observable covariates: exclude ALL baseline IPV variables
# (policymakers would not have access to baseline IPV survey responses)
cf_vars_policy_obs <- setdiff(cf_vars, c("bl_physical", "bl_emotional", "bl_control"))
cat("Observations for causal forest:", nrow(cf_data), "\n")
## Observations for causal forest: 1226
cat("Spec A covariates:", length(cf_vars), "\n")
## Spec A covariates: 19
cat("Spec C covariates (purely observable):", length(cf_vars_policy_obs), "\n")
## Spec C covariates (purely observable): 16
corrplot(cor(cf_data %>% select(all_of(cf_vars)), use = "complete.obs"),
type = "upper", tl.col = "black", tl.cex = 0.8)
With only ~1,100 observations, a classic 70/30 train/test split wastes valuable data. We use 5-fold cross-validation to generate out-of-fold CATE predictions while maximizing the training set size in each fold. The full data is used for point estimation (ATE, BLP, calibration), while k-fold out-of-fold predictions are used for evaluation (CATE distribution, quartile analysis).
K <- 5
set.seed(1234)
folds <- createFolds(cf_data$el_physical, k = K, list = TRUE)
# Full data for estimation
X <- cf_data %>% select(all_of(cf_vars))
Y <- cf_data$el_physical
W <- cf_data$p_treat
Y_ctrl <- cf_data$el_control
Y_emot <- cf_data$el_emotional
n <- nrow(cf_data)
cat("Full dataset:", n, "observations\n")
## Full dataset: 1226 observations
cat("K-fold CV with K =", K, "\n")
## K-fold CV with K = 5
cat("Fold sizes:", sapply(folds, length), "\n")
## Fold sizes: 245 245 245 246 245
Since this is an RCT, treatment assignment should be independent of covariates. We verify this by fitting a regression forest to predict W from X. If randomization worked, no covariate should have high predictive importance, and estimated propensity scores should be approximately constant.
set.seed(100241)
# Fit regression forest to predict treatment assignment
rf_W <- regression_forest(X, W, tune.parameters = "all")
W_hat <- predict(rf_W)$predictions
# Variable importance for treatment prediction
W_varimp <- variable_importance(rf_W)
W_vi_df <- data.frame(variable = names(X), importance = as.numeric(W_varimp))
W_vi_df <- W_vi_df %>% arrange(desc(importance))
cat("Propensity score summary (should be ~constant around", round(mean(W), 3), "):\n")
## Propensity score summary (should be ~constant around 0.719 ):
summary(W_hat)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6114 0.6850 0.7161 0.7185 0.7536 0.8387
cat("\nSD of estimated propensity scores:", round(sd(W_hat), 4))
##
## SD of estimated propensity scores: 0.0452
cat("\nActual treatment share:", round(mean(W), 3), "\n")
##
## Actual treatment share: 0.719
# Plot
ggplot(W_vi_df, aes(x = reorder(variable, importance), y = importance)) +
geom_col(fill = "coral", alpha = 0.8) +
coord_flip() +
labs(title = "Variable Importance for Predicting Treatment Assignment",
subtitle = "Randomization check: importance should be uniformly low",
x = "", y = "Importance") +
theme_minimal()
The propensity scores should cluster tightly around the actual treatment share with low variance. The variable importance bars should be roughly flat — if any covariate strongly predicted treatment, it would raise concerns about the randomization. This check complements the balance test from Part 2.
We fit regression forests to predict each IPV outcome from the covariates. This serves two purposes: (1) generating outcome predictions (Y.hat) for orthogonalization in the causal forest, and (2) identifying which covariates are strong predictors of the outcome level — these are the most promising candidates for detecting heterogeneity.
set.seed(100241)
# Outcome vectors Y_ctrl, Y_emot already defined in cv-setup (full data)
# Physical/Sexual Violence
rf_Y_phys <- regression_forest(X, Y, tune.parameters = "all")
Y_hat_phys <- predict(rf_Y_phys)$predictions
Y_vi_phys <- variable_importance(rf_Y_phys)
# Controlling Behaviors
rf_Y_ctrl <- regression_forest(X, Y_ctrl, tune.parameters = "all")
Y_hat_ctrl <- predict(rf_Y_ctrl)$predictions
Y_vi_ctrl <- variable_importance(rf_Y_ctrl)
# Emotional Violence
rf_Y_emot <- regression_forest(X, Y_emot, tune.parameters = "all")
Y_hat_emot <- predict(rf_Y_emot)$predictions
Y_vi_emot <- variable_importance(rf_Y_emot)
vi_outcome_df <- data.frame(
variable = names(X),
Physical = round(as.numeric(Y_vi_phys), 4),
Controlling = round(as.numeric(Y_vi_ctrl), 4),
Emotional = round(as.numeric(Y_vi_emot), 4)
)
vi_outcome_df <- vi_outcome_df %>% arrange(desc(Physical))
knitr::kable(vi_outcome_df, caption = "Outcome Regression Forest: Variable Importance for Predicting IPV Levels")
| variable | Physical | Controlling | Emotional |
|---|---|---|---|
| bl_physical | 0.4980 | 0.1493 | 0.1989 |
| bl_control | 0.1432 | 0.3389 | 0.1726 |
| bl_emotional | 0.0894 | 0.2445 | 0.4032 |
| bl_asset_index | 0.0805 | 0.0624 | 0.0617 |
| bl_partn_age | 0.0486 | 0.0423 | 0.0355 |
| bl_age | 0.0484 | 0.0780 | 0.0328 |
| norooms | 0.0149 | 0.0045 | 0.0031 |
| bl_married | 0.0143 | 0.0163 | 0.0178 |
| bl_nchildren6t14 | 0.0131 | 0.0120 | 0.0132 |
| bl_nchildren0t5 | 0.0127 | 0.0122 | 0.0164 |
| bl_partn_sedu | 0.0079 | 0.0047 | 0.0059 |
| bl_dempl | 0.0071 | 0.0076 | 0.0040 |
| bl_carchi | 0.0070 | 0.0078 | 0.0149 |
| bl_edu_sec | 0.0059 | 0.0060 | 0.0072 |
| bl_colom | 0.0050 | 0.0051 | 0.0053 |
| bl_black | 0.0017 | 0.0024 | 0.0027 |
| ownhouse | 0.0015 | 0.0025 | 0.0028 |
| bl_indig | 0.0006 | 0.0023 | 0.0018 |
| bl_head | 0.0002 | 0.0011 | 0.0004 |
# Athey/Wager threshold: keep variables with importance > 0.2 * mean
threshold <- 0.2
selected_phys <- which(Y_vi_phys / mean(Y_vi_phys) > threshold)
selected_ctrl <- which(Y_vi_ctrl / mean(Y_vi_ctrl) > threshold)
selected_emot <- which(Y_vi_emot / mean(Y_vi_emot) > threshold)
cat("Selected variables for Physical/Sexual Violence (", length(selected_phys), "of", ncol(X), "):\n")
## Selected variables for Physical/Sexual Violence ( 10 of 19 ):
print(names(X)[selected_phys])
## [1] "bl_age" "bl_married" "bl_partn_age" "bl_nchildren0t5"
## [5] "bl_nchildren6t14" "bl_asset_index" "norooms" "bl_physical"
## [9] "bl_emotional" "bl_control"
cat("\nSelected variables for Controlling Behaviors (", length(selected_ctrl), "of", ncol(X), "):\n")
##
## Selected variables for Controlling Behaviors ( 9 of 19 ):
print(names(X)[selected_ctrl])
## [1] "bl_age" "bl_married" "bl_partn_age" "bl_nchildren0t5"
## [5] "bl_nchildren6t14" "bl_asset_index" "bl_physical" "bl_emotional"
## [9] "bl_control"
cat("\nSelected variables for Emotional Violence (", length(selected_emot), "of", ncol(X), "):\n")
##
## Selected variables for Emotional Violence ( 10 of 19 ):
print(names(X)[selected_emot])
## [1] "bl_age" "bl_married" "bl_partn_age" "bl_nchildren0t5"
## [5] "bl_nchildren6t14" "bl_asset_index" "bl_carchi" "bl_physical"
## [9] "bl_emotional" "bl_control"
For policy purposes, we create an alternative specification that excludes cross-outcome baseline IPV variables. When targeting a program to reduce physical violence, a policymaker would not typically know a woman’s baseline emotional or controlling abuse status — these are measured through confidential surveys. Specification B uses only covariates that would plausibly be available for targeting decisions.
# Policy-relevant covariate matrices
X_pol_phys <- X[, cf_vars_policy_phys]
X_pol_ctrl <- X[, cf_vars_policy_ctrl]
X_pol_emot <- X[, cf_vars_policy_emot]
# Specification C: purely observable covariates (same for all outcomes)
X_pol_obs <- X[, cf_vars_policy_obs]
# Outcome regression forests with policy-relevant covariates
rf_Y_phys_pol <- regression_forest(X_pol_phys, Y, tune.parameters = "all")
Y_hat_phys_pol <- predict(rf_Y_phys_pol)$predictions
Y_vi_phys_pol <- variable_importance(rf_Y_phys_pol)
rf_Y_ctrl_pol <- regression_forest(X_pol_ctrl, Y_ctrl, tune.parameters = "all")
Y_hat_ctrl_pol <- predict(rf_Y_ctrl_pol)$predictions
Y_vi_ctrl_pol <- variable_importance(rf_Y_ctrl_pol)
rf_Y_emot_pol <- regression_forest(X_pol_emot, Y_emot, tune.parameters = "all")
Y_hat_emot_pol <- predict(rf_Y_emot_pol)$predictions
Y_vi_emot_pol <- variable_importance(rf_Y_emot_pol)
# Variable selection for policy specification
selected_phys_pol <- which(Y_vi_phys_pol / mean(Y_vi_phys_pol) > threshold)
selected_ctrl_pol <- which(Y_vi_ctrl_pol / mean(Y_vi_ctrl_pol) > threshold)
selected_emot_pol <- which(Y_vi_emot_pol / mean(Y_vi_emot_pol) > threshold)
cat("=== Policy-Relevant Variable Selection (Specification B) ===")
## === Policy-Relevant Variable Selection (Specification B) ===
cat("\nPhysical/Sexual (", length(selected_phys_pol), "of", ncol(X_pol_phys), "):\n")
##
## Physical/Sexual ( 8 of 17 ):
print(names(X_pol_phys)[selected_phys_pol])
## [1] "bl_age" "bl_married" "bl_partn_age" "bl_nchildren0t5"
## [5] "bl_nchildren6t14" "bl_asset_index" "norooms" "bl_physical"
cat("\nControlling (", length(selected_ctrl_pol), "of", ncol(X_pol_ctrl), "):\n")
##
## Controlling ( 12 of 17 ):
print(names(X_pol_ctrl)[selected_ctrl_pol])
## [1] "bl_colom" "bl_age" "bl_edu_sec" "bl_married"
## [5] "bl_dempl" "bl_partn_age" "bl_nchildren0t5" "bl_nchildren6t14"
## [9] "bl_asset_index" "norooms" "bl_carchi" "bl_control"
cat("\nEmotional (", length(selected_emot_pol), "of", ncol(X_pol_emot), "):\n")
##
## Emotional ( 8 of 17 ):
print(names(X_pol_emot)[selected_emot_pol])
## [1] "bl_age" "bl_married" "bl_partn_age" "bl_nchildren0t5"
## [5] "bl_nchildren6t14" "bl_asset_index" "bl_carchi" "bl_emotional"
Specification C excludes all baseline IPV variables (physical, emotional, controlling). This represents a targeting scenario where policymakers only have access to observable demographic and socioeconomic characteristics — no confidential IPV survey data at all.
# Outcome regression forests with purely observable covariates
rf_Y_phys_obs <- regression_forest(X_pol_obs, Y, tune.parameters = "all")
Y_hat_phys_obs <- predict(rf_Y_phys_obs)$predictions
Y_vi_phys_obs <- variable_importance(rf_Y_phys_obs)
rf_Y_ctrl_obs <- regression_forest(X_pol_obs, Y_ctrl, tune.parameters = "all")
Y_hat_ctrl_obs <- predict(rf_Y_ctrl_obs)$predictions
Y_vi_ctrl_obs <- variable_importance(rf_Y_ctrl_obs)
rf_Y_emot_obs <- regression_forest(X_pol_obs, Y_emot, tune.parameters = "all")
Y_hat_emot_obs <- predict(rf_Y_emot_obs)$predictions
Y_vi_emot_obs <- variable_importance(rf_Y_emot_obs)
# Variable selection for purely observable specification
selected_phys_obs <- which(Y_vi_phys_obs / mean(Y_vi_phys_obs) > threshold)
selected_ctrl_obs <- which(Y_vi_ctrl_obs / mean(Y_vi_ctrl_obs) > threshold)
selected_emot_obs <- which(Y_vi_emot_obs / mean(Y_vi_emot_obs) > threshold)
cat("=== Purely Observable Variable Selection (Specification C) ===")
## === Purely Observable Variable Selection (Specification C) ===
cat("\nPhysical/Sexual (", length(selected_phys_obs), "of", ncol(X_pol_obs), "):\n")
##
## Physical/Sexual ( 11 of 16 ):
print(names(X_pol_obs)[selected_phys_obs])
## [1] "bl_colom" "bl_age" "bl_married" "bl_black"
## [5] "bl_dempl" "bl_partn_sedu" "bl_partn_age" "bl_nchildren0t5"
## [9] "bl_nchildren6t14" "bl_asset_index" "norooms"
cat("\nControlling (", length(selected_ctrl_obs), "of", ncol(X_pol_obs), "):\n")
##
## Controlling ( 16 of 16 ):
print(names(X_pol_obs)[selected_ctrl_obs])
## [1] "bl_head" "bl_colom" "bl_age" "bl_edu_sec"
## [5] "bl_married" "bl_indig" "bl_black" "bl_dempl"
## [9] "ownhouse" "bl_partn_sedu" "bl_partn_age" "bl_nchildren0t5"
## [13] "bl_nchildren6t14" "bl_asset_index" "norooms" "bl_carchi"
cat("\nEmotional (", length(selected_emot_obs), "of", ncol(X_pol_obs), "):\n")
##
## Emotional ( 12 of 16 ):
print(names(X_pol_obs)[selected_emot_obs])
## [1] "bl_colom" "bl_age" "bl_edu_sec" "bl_married"
## [5] "bl_dempl" "bl_partn_sedu" "bl_partn_age" "bl_nchildren0t5"
## [9] "bl_nchildren6t14" "bl_asset_index" "norooms" "bl_carchi"
vi_long <- vi_outcome_df %>%
pivot_longer(cols = c(Physical, Controlling, Emotional),
names_to = "Outcome", values_to = "Importance")
ggplot(vi_long, aes(x = reorder(variable, Importance), y = Importance, fill = Outcome)) +
geom_col(position = "dodge", alpha = 0.8) +
coord_flip() +
labs(title = "Outcome Regression Forest: Variable Importance Across IPV Outcomes",
subtitle = "Variables above the selection threshold are retained for the causal forest",
x = "", y = "Importance") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")
vi_phys_pol <- data.frame(
variable = names(X_pol_phys),
Importance = as.numeric(Y_vi_phys_pol),
Outcome = "Physical"
)
vi_ctrl_pol <- data.frame(
variable = names(X_pol_ctrl),
Importance = as.numeric(Y_vi_ctrl_pol),
Outcome = "Controlling"
)
vi_emot_pol <- data.frame(
variable = names(X_pol_emot),
Importance = as.numeric(Y_vi_emot_pol),
Outcome = "Emotional"
)
vi_long_pol <- bind_rows(vi_phys_pol, vi_ctrl_pol, vi_emot_pol)
# Plot
ggplot(vi_long_pol,
aes(x = reorder(variable, Importance), y = Importance, fill = Outcome)) +
geom_col(position = "dodge", alpha = 0.8) +
coord_flip() +
labs(title = "Outcome Regression Forest: Variable Importance Across IPV Outcomes",
subtitle = "Policy-Relevant Covariates (Specification B)",
x = "", y = "Importance") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")
vi_outcome_df_obs <- data.frame(
variable = names(X_pol_obs),
Physical = round(as.numeric(Y_vi_phys_obs), 4),
Controlling = round(as.numeric(Y_vi_ctrl_obs), 4),
Emotional = round(as.numeric(Y_vi_emot_obs), 4)
)
vi_outcome_df_obs <- vi_outcome_df_obs %>% arrange(desc(Physical))
knitr::kable(vi_outcome_df_obs, caption = "Outcome Regression Forest: Variable Importance — Purely Observable Covariates (Specification C)")
| variable | Physical | Controlling | Emotional |
|---|---|---|---|
| bl_asset_index | 0.2745 | 0.0896 | 0.1058 |
| bl_age | 0.2264 | 0.1131 | 0.1074 |
| bl_partn_age | 0.2046 | 0.1060 | 0.1001 |
| bl_nchildren0t5 | 0.0609 | 0.0738 | 0.1015 |
| bl_nchildren6t14 | 0.0510 | 0.0714 | 0.0804 |
| bl_married | 0.0454 | 0.0785 | 0.0914 |
| norooms | 0.0434 | 0.0532 | 0.0609 |
| bl_black | 0.0188 | 0.0384 | 0.0016 |
| bl_partn_sedu | 0.0178 | 0.0519 | 0.0686 |
| bl_dempl | 0.0150 | 0.0611 | 0.0652 |
| bl_colom | 0.0126 | 0.0517 | 0.0624 |
| bl_carchi | 0.0118 | 0.0590 | 0.0860 |
| bl_edu_sec | 0.0102 | 0.0539 | 0.0671 |
| ownhouse | 0.0048 | 0.0349 | 0.0006 |
| bl_indig | 0.0020 | 0.0379 | 0.0007 |
| bl_head | 0.0008 | 0.0258 | 0.0002 |
vi_long_obs <- vi_outcome_df_obs %>%
pivot_longer(cols = c(Physical, Controlling, Emotional),
names_to = "Outcome", values_to = "Importance")
ggplot(vi_long_obs,
aes(x = reorder(variable, Importance), y = Importance, fill = Outcome)) +
geom_col(position = "dodge", alpha = 0.8) +
coord_flip() +
labs(title = "Outcome Regression Forest: Variable Importance Across IPV Outcomes",
subtitle = "Purely Observable Covariates (Specification C)",
x = "", y = "Importance") +
theme_minimal() +
scale_fill_brewer(palette = "Set2")
The variable importance from the outcome regression forests tells us which covariates are strong predictors of IPV levels. Variables that survive the selection threshold are kept in the causal forest. Note that predicting outcome levels well is distinct from driving heterogeneity in treatment effects — but following Athey & Wager, pre-selecting on outcome prediction is a principled way to reduce noise in the causal forest when the covariate space is large relative to the sample size.
We estimate causal forests in three specifications:
set.seed(100241)
# --- Specification A: All selected covariates ---
cf <- causal_forest(X[, selected_phys], Y, W,
W.hat = W_hat, Y.hat = Y_hat_phys,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_all <- average_treatment_effect(cf, target.sample = "all")
att <- average_treatment_effect(cf, target.sample = "treated")
cf_ctrl <- causal_forest(X[, selected_ctrl], Y_ctrl, W,
W.hat = W_hat, Y.hat = Y_hat_ctrl,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_ctrl <- average_treatment_effect(cf_ctrl, target.sample = "all")
cf_emot <- causal_forest(X[, selected_emot], Y_emot, W,
W.hat = W_hat, Y.hat = Y_hat_emot,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_emot <- average_treatment_effect(cf_emot, target.sample = "all")
# --- Specification B: Policy-relevant covariates ---
cf_pol <- causal_forest(X_pol_phys[, selected_phys_pol], Y, W,
W.hat = W_hat, Y.hat = Y_hat_phys_pol,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_all_pol <- average_treatment_effect(cf_pol, target.sample = "all")
cf_ctrl_pol <- causal_forest(X_pol_ctrl[, selected_ctrl_pol], Y_ctrl, W,
W.hat = W_hat, Y.hat = Y_hat_ctrl_pol,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_ctrl_pol <- average_treatment_effect(cf_ctrl_pol, target.sample = "all")
cf_emot_pol <- causal_forest(X_pol_emot[, selected_emot_pol], Y_emot, W,
W.hat = W_hat, Y.hat = Y_hat_emot_pol,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_emot_pol <- average_treatment_effect(cf_emot_pol, target.sample = "all")
# --- Specification C: Purely observable covariates ---
cf_phys_obs <- causal_forest(X_pol_obs[, selected_phys_obs], Y, W,
W.hat = W_hat, Y.hat = Y_hat_phys_obs,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_all_obs <- average_treatment_effect(cf_phys_obs, target.sample = "all")
att_obs <- average_treatment_effect(cf_phys_obs, target.sample = "treated")
cf_ctrl_obs <- causal_forest(X_pol_obs[, selected_ctrl_obs], Y_ctrl, W,
W.hat = W_hat, Y.hat = Y_hat_ctrl_obs,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_ctrl_obs <- average_treatment_effect(cf_ctrl_obs, target.sample = "all")
cf_emot_obs <- causal_forest(X_pol_obs[, selected_emot_obs], Y_emot, W,
W.hat = W_hat, Y.hat = Y_hat_emot_obs,
num.trees = 4000, tune.parameters = "all",
compute.oob.predictions = TRUE)
ate_emot_obs <- average_treatment_effect(cf_emot_obs, target.sample = "all")
cat("=== Specification A (All Selected Covariates) ===\n")
## === Specification A (All Selected Covariates) ===
cat("Physical/Sexual: ATE =", round(ate_all[1], 4), " SE =", round(ate_all[2], 4), "\n")
## Physical/Sexual: ATE = -0.0577 SE = 0.0239
cat(" ATT =", round(att[1], 4), " SE =", round(att[2], 4), "\n")
## ATT = -0.0587 SE = 0.0243
cat("Controlling: ATE =", round(ate_ctrl[1], 4), " SE =", round(ate_ctrl[2], 4), "\n")
## Controlling: ATE = -0.0697 SE = 0.0293
cat("Emotional: ATE =", round(ate_emot[1], 4), " SE =", round(ate_emot[2], 4), "\n")
## Emotional: ATE = -0.0464 SE = 0.0287
cat("\n=== Specification B (Policy-Relevant) ===\n")
##
## === Specification B (Policy-Relevant) ===
cat("Physical/Sexual: ATE =", round(ate_all_pol[1], 4), " SE =", round(ate_all_pol[2], 4), "\n")
## Physical/Sexual: ATE = -0.0603 SE = 0.024
cat("Controlling: ATE =", round(ate_ctrl_pol[1], 4), " SE =", round(ate_ctrl_pol[2], 4), "\n")
## Controlling: ATE = -0.0621 SE = 0.0294
cat("Emotional: ATE =", round(ate_emot_pol[1], 4), " SE =", round(ate_emot_pol[2], 4), "\n")
## Emotional: ATE = -0.0485 SE = 0.029
cat("\n=== Specification C (Purely Observable) ===\n")
##
## === Specification C (Purely Observable) ===
cat("Physical/Sexual: ATE =", round(ate_all_obs[1], 4), " SE =", round(ate_all_obs[2], 4), "\n")
## Physical/Sexual: ATE = -0.0487 SE = 0.0252
cat(" ATT =", round(att_obs[1], 4), " SE =", round(att_obs[2], 4), "\n")
## ATT = -0.0492 SE = 0.0256
cat("Controlling: ATE =", round(ate_ctrl_obs[1], 4), " SE =", round(ate_ctrl_obs[2], 4), "\n")
## Controlling: ATE = -0.0694 SE = 0.0302
cat("Emotional: ATE =", round(ate_emot_obs[1], 4), " SE =", round(ate_emot_obs[2], 4), "\n")
## Emotional: ATE = -0.0419 SE = 0.0303
We generate out-of-fold CATE predictions using 5-fold CV. In each fold, we fit causal forests on the training folds and predict on the held-out fold, ensuring every observation has a truly out-of-sample CATE prediction for downstream evaluation.
set.seed(100241)
oof_cate_phys <- rep(NA, n)
oof_cate_ctrl <- rep(NA, n)
oof_cate_emot <- rep(NA, n)
oof_cate_phys_pol <- rep(NA, n)
oof_cate_ctrl_pol <- rep(NA, n)
oof_cate_emot_pol <- rep(NA, n)
oof_cate_phys_obs <- rep(NA, n)
oof_cate_ctrl_obs <- rep(NA, n)
oof_cate_emot_obs <- rep(NA, n)
for (k in 1:K) {
val_idx <- folds[[k]]
train_idx <- setdiff(1:n, val_idx)
X_tr <- X[train_idx, ]; X_va <- X[val_idx, ]
Y_tr <- Y[train_idx]; W_tr <- W[train_idx]
Y_ctrl_tr <- Y_ctrl[train_idx]; Y_emot_tr <- Y_emot[train_idx]
# Use full-data OOB nuisance estimates (valid since OOB is cross-validated)
W_hat_tr <- W_hat[train_idx]
Y_hat_phys_tr <- Y_hat_phys[train_idx]
Y_hat_ctrl_tr <- Y_hat_ctrl[train_idx]
Y_hat_emot_tr <- Y_hat_emot[train_idx]
# Spec A causal forests
cf_k <- causal_forest(X_tr[, selected_phys], Y_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_phys_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_phys[val_idx] <- predict(cf_k, X_va[, selected_phys])$predictions
cf_k_c <- causal_forest(X_tr[, selected_ctrl], Y_ctrl_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_ctrl_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_ctrl[val_idx] <- predict(cf_k_c, X_va[, selected_ctrl])$predictions
cf_k_e <- causal_forest(X_tr[, selected_emot], Y_emot_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_emot_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_emot[val_idx] <- predict(cf_k_e, X_va[, selected_emot])$predictions
# Spec B: policy-relevant
X_pol_phys_tr <- X_tr[, cf_vars_policy_phys]; X_pol_phys_va <- X_va[, cf_vars_policy_phys]
X_pol_ctrl_tr <- X_tr[, cf_vars_policy_ctrl]; X_pol_ctrl_va <- X_va[, cf_vars_policy_ctrl]
X_pol_emot_tr <- X_tr[, cf_vars_policy_emot]; X_pol_emot_va <- X_va[, cf_vars_policy_emot]
Y_hat_phys_pol_tr <- Y_hat_phys_pol[train_idx]
Y_hat_ctrl_pol_tr <- Y_hat_ctrl_pol[train_idx]
Y_hat_emot_pol_tr <- Y_hat_emot_pol[train_idx]
cf_k_pp <- causal_forest(X_pol_phys_tr[, selected_phys_pol], Y_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_phys_pol_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_phys_pol[val_idx] <- predict(cf_k_pp, X_pol_phys_va[, selected_phys_pol])$predictions
cf_k_cp <- causal_forest(X_pol_ctrl_tr[, selected_ctrl_pol], Y_ctrl_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_ctrl_pol_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_ctrl_pol[val_idx] <- predict(cf_k_cp, X_pol_ctrl_va[, selected_ctrl_pol])$predictions
cf_k_ep <- causal_forest(X_pol_emot_tr[, selected_emot_pol], Y_emot_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_emot_pol_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_emot_pol[val_idx] <- predict(cf_k_ep, X_pol_emot_va[, selected_emot_pol])$predictions
# Spec C: purely observable covariates
X_pol_obs_tr <- X_tr[, cf_vars_policy_obs]; X_pol_obs_va <- X_va[, cf_vars_policy_obs]
Y_hat_phys_obs_tr <- Y_hat_phys_obs[train_idx]
Y_hat_ctrl_obs_tr <- Y_hat_ctrl_obs[train_idx]
Y_hat_emot_obs_tr <- Y_hat_emot_obs[train_idx]
cf_k_po <- causal_forest(X_pol_obs_tr[, selected_phys_obs], Y_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_phys_obs_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_phys_obs[val_idx] <- predict(cf_k_po, X_pol_obs_va[, selected_phys_obs])$predictions
cf_k_co <- causal_forest(X_pol_obs_tr[, selected_ctrl_obs], Y_ctrl_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_ctrl_obs_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_ctrl_obs[val_idx] <- predict(cf_k_co, X_pol_obs_va[, selected_ctrl_obs])$predictions
cf_k_eo <- causal_forest(X_pol_obs_tr[, selected_emot_obs], Y_emot_tr, W_tr,
W.hat = W_hat_tr, Y.hat = Y_hat_emot_obs_tr,
num.trees = 2000, tune.parameters = "all")
oof_cate_emot_obs[val_idx] <- predict(cf_k_eo, X_pol_obs_va[, selected_emot_obs])$predictions
cat("Fold", k, "of", K, "complete\n")
}
## Fold 1 of 5 complete
## Fold 2 of 5 complete
## Fold 3 of 5 complete
## Fold 4 of 5 complete
## Fold 5 of 5 complete
cat("\nSpec A OOF CATE summary (Physical/Sexual):\n"); summary(oof_cate_phys)
##
## Spec A OOF CATE summary (Physical/Sexual):
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.07316 -0.07059 -0.06403 -0.05586 -0.03794 -0.03361
cat("\nSpec B OOF CATE summary (Physical/Sexual):\n"); summary(oof_cate_phys_pol)
##
## Spec B OOF CATE summary (Physical/Sexual):
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.08510 -0.07344 -0.07216 -0.05944 -0.03844 -0.02871
cat("\nSpec C OOF CATE summary (Physical/Sexual):\n"); summary(oof_cate_phys_obs)
##
## Spec C OOF CATE summary (Physical/Sexual):
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.07133 -0.06273 -0.06011 -0.04890 -0.03542 -0.02107
cate_oof_df <- data.frame(
CATE = c(oof_cate_phys, oof_cate_phys_pol, oof_cate_phys_obs),
Specification = factor(rep(
c("Spec A (All Selected)", "Spec B (Policy-Relevant)", "Spec C (Observable)"),
each = n
), levels = c("Spec A (All Selected)", "Spec B (Policy-Relevant)", "Spec C (Observable)"))
)
cate_mean_df <- data.frame(
Specification = factor(
c("Spec A (All Selected)", "Spec B (Policy-Relevant)", "Spec C (Observable)"),
levels = c("Spec A (All Selected)", "Spec B (Policy-Relevant)", "Spec C (Observable)")
),
Mean = c(mean(oof_cate_phys), mean(oof_cate_phys_pol), mean(oof_cate_phys_obs))
)
ggplot(cate_oof_df, aes(x = CATE)) +
geom_density(fill = "steelblue", alpha = 0.4) +
geom_vline(xintercept = 0, color = "blue", linetype = "dotted") +
geom_vline(data = cate_mean_df, aes(xintercept = Mean), color = "red", linetype = "dotted") +
facet_wrap(~ Specification, scales = "free") +
labs(title = "Out-of-Fold CATE Distributions (Physical/Sexual Violence)",
subtitle = "Blue dotted = zero | Red dotted = mean CATE",
x = "Treatment Effect", y = "Density") +
theme_minimal()
# OOB predictions with variance estimates from the full-data forest
oob_ci <- predict(cf, estimate.variance = TRUE)
oob_cate_phys <- oob_ci$predictions
sigma_hat <- sqrt(oob_ci$variance.estimates)
ord <- order(oob_cate_phys)
ci_df <- data.frame(
rank = seq_along(ord),
cate = oob_cate_phys[ord],
lower = (oob_cate_phys - 1.96 * sigma_hat)[ord],
upper = (oob_cate_phys + 1.96 * sigma_hat)[ord]
)
ggplot(ci_df, aes(x = rank, y = cate)) +
geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.5) +
geom_line(color = "black") +
geom_hline(yintercept = 0, color = "blue", linetype = "dotted") +
geom_hline(yintercept = mean(oob_cate_phys), color = "red", linetype = "dotted") +
labs(title = "Individual CATEs with 95% CI (OOB, Full Data)",
subtitle = "Blue dotted = zero | Red dotted = mean CATE",
x = "Observation (sorted by CATE)", y = "Treatment Effect") +
theme_minimal()
m_phys <- lm(as.formula(paste("el_physical ~ p_treat + bl_physical +", controls_formula)), data = dfa)
m_ctrl_lpm <- lm(as.formula(paste("el_control ~ p_treat + bl_control +", controls_formula)), data = dfa)
m_emot_lpm <- lm(as.formula(paste("el_emotional ~ p_treat + bl_emotional +", controls_formula)), data = dfa)
se_phys <- sqrt(diag(vcovCL(m_phys, cluster = dfa$bl_a05)))["p_treat"]
se_ctrl_lpm <- sqrt(diag(vcovCL(m_ctrl_lpm, cluster = dfa$bl_a05)))["p_treat"]
se_emot_lpm <- sqrt(diag(vcovCL(m_emot_lpm, cluster = dfa$bl_a05)))["p_treat"]
comparison_table <- data.frame(
Outcome = c("Physical/Sexual Violence", "Controlling Behaviors", "Emotional Violence"),
OLS_ATE = round(c(coef(m_phys)["p_treat"], coef(m_ctrl_lpm)["p_treat"], coef(m_emot_lpm)["p_treat"]), 4),
OLS_SE = round(c(se_phys, se_ctrl_lpm, se_emot_lpm), 4),
CF_ATE = round(c(ate_all[1], ate_ctrl[1], ate_emot[1]), 4),
CF_SE = round(c(ate_all[2], ate_ctrl[2], ate_emot[2]), 4),
Control_Mean = round(c(
mean(dfa$el_physical[dfa$p_treat == 0], na.rm = TRUE),
mean(dfa$el_control[dfa$p_treat == 0], na.rm = TRUE),
mean(dfa$el_emotional[dfa$p_treat == 0], na.rm = TRUE)
), 3)
)
comparison_table$OLS_pct_reduction <- round(comparison_table$OLS_ATE / comparison_table$Control_Mean * 100, 1)
comparison_table$CF_pct_reduction <- round(comparison_table$CF_ATE / comparison_table$Control_Mean * 100, 1)
knitr::kable(comparison_table,
col.names = c("Outcome", "OLS ATE", "OLS SE", "CF ATE", "CF SE",
"Control Mean", "OLS % Reduction", "CF % Reduction"))
| Outcome | OLS ATE | OLS SE | CF ATE | CF SE | Control Mean | OLS % Reduction | CF % Reduction |
|---|---|---|---|---|---|---|---|
| Physical/Sexual Violence | -0.0601 | 0.0333 | -0.0577 | 0.0239 | 0.197 | -30.5 | -29.3 |
| Controlling Behaviors | -0.0679 | 0.0335 | -0.0697 | 0.0293 | 0.362 | -18.8 | -19.3 |
| Emotional Violence | -0.0398 | 0.0344 | -0.0464 | 0.0287 | 0.351 | -11.3 | -13.2 |
# Specification A vs B vs C comparison
spec_comparison <- data.frame(
Outcome = c("Physical/Sexual Violence", "Controlling Behaviors", "Emotional Violence"),
Spec_A_ATE = round(c(ate_all[1], ate_ctrl[1], ate_emot[1]), 4),
Spec_A_SE = round(c(ate_all[2], ate_ctrl[2], ate_emot[2]), 4),
Spec_B_ATE = round(c(ate_all_pol[1], ate_ctrl_pol[1], ate_emot_pol[1]), 4),
Spec_B_SE = round(c(ate_all_pol[2], ate_ctrl_pol[2], ate_emot_pol[2]), 4),
Spec_C_ATE = round(c(ate_all_obs[1], ate_ctrl_obs[1], ate_emot_obs[1]), 4),
Spec_C_SE = round(c(ate_all_obs[2], ate_ctrl_obs[2], ate_emot_obs[2]), 4)
)
knitr::kable(spec_comparison,
col.names = c("Outcome", "Spec A ATE", "Spec A SE", "Spec B ATE", "Spec B SE",
"Spec C ATE", "Spec C SE"),
caption = "Specification A vs B vs C: Effect of Covariate Restrictions")
| Outcome | Spec A ATE | Spec A SE | Spec B ATE | Spec B SE | Spec C ATE | Spec C SE |
|---|---|---|---|---|---|---|
| Physical/Sexual Violence | -0.0577 | 0.0239 | -0.0603 | 0.0240 | -0.0487 | 0.0252 |
| Controlling Behaviors | -0.0697 | 0.0293 | -0.0621 | 0.0294 | -0.0694 | 0.0302 |
| Emotional Violence | -0.0464 | 0.0287 | -0.0485 | 0.0290 | -0.0419 | 0.0303 |
Both the OLS and the Causal Forest should agree on the direction and rough magnitude of the treatment effects across all three outcomes, since this is an RCT and both methods should recover the same average effect under randomization. The causal forest now uses the Athey/Wager two-step pipeline (propensity orthogonalization + outcome-based variable selection), which should improve efficiency. Compare the CF ATE estimates with the OLS estimates — they should be close, which confirms the forest is well-calibrated at the average level.
cat("=== Specification A ===")
## === Specification A ===
cat("\n--- Best Linear Projection: Physical/Sexual Violence ---\n")
##
## --- Best Linear Projection: Physical/Sexual Violence ---
blp_phys <- best_linear_projection(cf, X[, selected_phys])
print(blp_phys)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1000044 0.1235572 -0.8094 0.41846
## bl_age -0.0017336 0.0043375 -0.3997 0.68946
## bl_married 0.0918910 0.0512166 1.7942 0.07304 .
## bl_partn_age 0.0011980 0.0043217 0.2772 0.78168
## bl_nchildren0t5 0.0208956 0.0437058 0.4781 0.63267
## bl_nchildren6t14 -0.0178244 0.0239639 -0.7438 0.45714
## bl_asset_index 0.0086730 0.0152429 0.5690 0.56947
## norooms 0.0841146 0.0850259 0.9893 0.32272
## bl_physical 0.0792716 0.1020392 0.7769 0.43738
## bl_emotional -0.0148248 0.0708142 -0.2093 0.83421
## bl_control -0.0402676 0.0964423 -0.4175 0.67636
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Best Linear Projection: Controlling Behaviors ---\n")
##
## --- Best Linear Projection: Controlling Behaviors ---
blp_ctrl <- best_linear_projection(cf_ctrl, X[, selected_ctrl])
print(blp_ctrl)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.13218713 0.12621680 -1.0473 0.29517
## bl_age 0.00089949 0.00498699 0.1804 0.85689
## bl_married 0.00987188 0.06289379 0.1570 0.87530
## bl_partn_age -0.00196940 0.00469768 -0.4192 0.67512
## bl_nchildren0t5 0.08409471 0.04500581 1.8685 0.06193 .
## bl_nchildren6t14 0.03521191 0.02843995 1.2381 0.21591
## bl_asset_index 0.01008179 0.01635709 0.6164 0.53778
## bl_physical 0.16884871 0.10461778 1.6140 0.10680
## bl_emotional -0.09419647 0.09009391 -1.0455 0.29598
## bl_control 0.00584518 0.10249370 0.0570 0.95453
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Best Linear Projection: Emotional Violence ---\n")
##
## --- Best Linear Projection: Emotional Violence ---
blp_emot <- best_linear_projection(cf_emot, X[, selected_emot])
print(blp_emot)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1465476 0.1236409 -1.1853 0.23614
## bl_age -0.0028275 0.0051103 -0.5533 0.58016
## bl_married 0.0739129 0.0650190 1.1368 0.25585
## bl_partn_age 0.0031890 0.0048066 0.6635 0.50717
## bl_nchildren0t5 0.0856955 0.0443821 1.9309 0.05373 .
## bl_nchildren6t14 -0.0162518 0.0290938 -0.5586 0.57654
## bl_asset_index 0.0167930 0.0165620 1.0140 0.31081
## bl_carchi -0.0508362 0.0624856 -0.8136 0.41605
## bl_physical 0.0893699 0.1054841 0.8472 0.39703
## bl_emotional -0.0528201 0.0910615 -0.5800 0.56199
## bl_control 0.0380223 0.1014398 0.3748 0.70786
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n=== Specification B (Policy-Relevant) ===")
##
## === Specification B (Policy-Relevant) ===
cat("\n--- Best Linear Projection: Physical/Sexual Violence ---\n")
##
## --- Best Linear Projection: Physical/Sexual Violence ---
blp_phys_pol <- best_linear_projection(cf_pol, X_pol_phys[, selected_phys_pol])
print(blp_phys_pol)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.10841743 0.12496562 -0.8676 0.38580
## bl_age -0.00086593 0.00434239 -0.1994 0.84197
## bl_married 0.09901641 0.05113983 1.9362 0.05308 .
## bl_partn_age 0.00035772 0.00438609 0.0816 0.93501
## bl_nchildren0t5 0.02229131 0.04473826 0.4983 0.61839
## bl_nchildren6t14 -0.02094168 0.02388605 -0.8767 0.38080
## bl_asset_index 0.00976418 0.01553441 0.6286 0.52976
## norooms 0.08539786 0.08636388 0.9888 0.32295
## bl_physical 0.05196461 0.08246506 0.6301 0.52872
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Best Linear Projection: Controlling Behaviors ---\n")
##
## --- Best Linear Projection: Controlling Behaviors ---
blp_ctrl_pol <- best_linear_projection(cf_ctrl_pol, X_pol_ctrl[, selected_ctrl_pol])
print(blp_ctrl_pol)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2352333 0.1664114 -1.4136 0.15775
## bl_colom 0.0165207 0.0707558 0.2335 0.81542
## bl_age 0.0019622 0.0051679 0.3797 0.70424
## bl_edu_sec 0.1061002 0.0710683 1.4929 0.13572
## bl_married 0.0150949 0.0686614 0.2198 0.82603
## bl_dempl -0.0671536 0.0644579 -1.0418 0.29770
## bl_partn_age -0.0016009 0.0047794 -0.3350 0.73771
## bl_nchildren0t5 0.1010507 0.0466592 2.1657 0.03053 *
## bl_nchildren6t14 0.0457307 0.0295246 1.5489 0.12167
## bl_asset_index 0.0048592 0.0192305 0.2527 0.80056
## norooms 0.0528069 0.1011868 0.5219 0.60185
## bl_carchi 0.0104220 0.0660645 0.1578 0.87468
## bl_control 0.0246849 0.0864850 0.2854 0.77537
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Best Linear Projection: Emotional Violence ---\n")
##
## --- Best Linear Projection: Emotional Violence ---
blp_emot_pol <- best_linear_projection(cf_emot_pol, X_pol_emot[, selected_emot_pol])
print(blp_emot_pol)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1472679 0.1249143 -1.1790 0.23865
## bl_age -0.0022061 0.0050892 -0.4335 0.66475
## bl_married 0.0631691 0.0649907 0.9720 0.33126
## bl_partn_age 0.0029449 0.0047976 0.6138 0.53944
## bl_nchildren0t5 0.0830642 0.0444881 1.8671 0.06213 .
## bl_nchildren6t14 -0.0139069 0.0288333 -0.4823 0.62967
## bl_asset_index 0.0165902 0.0168101 0.9869 0.32388
## bl_carchi -0.0537107 0.0628015 -0.8552 0.39258
## bl_emotional -0.0034706 0.0704287 -0.0493 0.96071
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n=== Specification C (Purely Observable) ===")
##
## === Specification C (Purely Observable) ===
cat("\n--- Best Linear Projection: Physical/Sexual Violence ---\n")
##
## --- Best Linear Projection: Physical/Sexual Violence ---
blp_phys_obs <- best_linear_projection(cf_phys_obs, X_pol_obs[, selected_phys_obs])
print(blp_phys_obs)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.18685174 0.14521247 -1.2867 0.1984
## bl_colom -0.00023180 0.06074368 -0.0038 0.9970
## bl_age 0.00020569 0.00442750 0.0465 0.9630
## bl_married 0.07179208 0.05683713 1.2631 0.2068
## bl_black 0.04538864 0.10031477 0.4525 0.6510
## bl_dempl -0.04798959 0.05710993 -0.8403 0.4009
## bl_partn_sedu 0.09204372 0.05626684 1.6358 0.1021
## bl_partn_age 0.00127089 0.00426719 0.2978 0.7659
## bl_nchildren0t5 0.03654385 0.04621558 0.7907 0.4293
## bl_nchildren6t14 -0.01403080 0.02558415 -0.5484 0.5835
## bl_asset_index 0.00124473 0.01665725 0.0747 0.9404
## norooms 0.09409778 0.08851783 1.0630 0.2880
cat("\n--- Best Linear Projection: Controlling Behaviors ---\n")
##
## --- Best Linear Projection: Controlling Behaviors ---
blp_ctrl_obs <- best_linear_projection(cf_ctrl_obs, X_pol_obs[, selected_ctrl_obs])
print(blp_ctrl_obs)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2328186 0.1751592 -1.3292 0.1840385
## bl_head 0.0739068 0.2507252 0.2948 0.7682186
## bl_colom 0.0138856 0.0738598 0.1880 0.8509087
## bl_age 0.0057809 0.0054005 1.0705 0.2846302
## bl_edu_sec 0.1224426 0.0798524 1.5334 0.1254487
## bl_married 0.0014817 0.0709355 0.0209 0.9833383
## bl_indig -0.4989072 0.1507094 -3.3104 0.0009591 ***
## bl_black 0.1127024 0.1173344 0.9605 0.3369840
## bl_dempl -0.0694329 0.0663394 -1.0466 0.2954790
## ownhouse -0.0213614 0.1394939 -0.1531 0.8783172
## bl_partn_sedu -0.0247578 0.0770955 -0.3211 0.7481664
## bl_partn_age -0.0048983 0.0051668 -0.9480 0.3432972
## bl_nchildren0t5 0.1107183 0.0491776 2.2514 0.0245394 *
## bl_nchildren6t14 0.0453585 0.0302079 1.5015 0.1334759
## bl_asset_index 0.0024416 0.0203893 0.1197 0.9047035
## norooms 0.0783139 0.1030734 0.7598 0.4475297
## bl_carchi 0.0153411 0.0697435 0.2200 0.8259357
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n--- Best Linear Projection: Emotional Violence ---\n")
##
## --- Best Linear Projection: Emotional Violence ---
blp_emot_obs <- best_linear_projection(cf_emot_obs, X_pol_obs[, selected_emot_obs])
print(blp_emot_obs)
##
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3016051 0.1762209 -1.7115 0.08724 .
## bl_colom 0.0322986 0.0730214 0.4423 0.65834
## bl_age 0.0015419 0.0056732 0.2718 0.78584
## bl_edu_sec 0.0999851 0.0796701 1.2550 0.20972
## bl_married 0.0680858 0.0708152 0.9615 0.33651
## bl_dempl -0.0811585 0.0666257 -1.2181 0.22341
## bl_partn_sedu 0.0540369 0.0777410 0.6951 0.48713
## bl_partn_age 0.0014708 0.0053866 0.2730 0.78486
## bl_nchildren0t5 0.1059694 0.0497525 2.1299 0.03338 *
## bl_nchildren6t14 -0.0075796 0.0309947 -0.2445 0.80685
## bl_asset_index 0.0110714 0.0203927 0.5429 0.58729
## norooms 0.0781911 0.1042103 0.7503 0.45321
## bl_carchi -0.0326712 0.0693800 -0.4709 0.63780
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The BLP results help us understand which covariates are associated with treatment effect heterogeneity — basically for whom the program works better or worse. Note that the BLP now only includes the selected covariates from the outcome regression forest pre-selection step, so the set of variables examined may differ across outcomes.
quartile_analysis <- function(Y_vec, W_vec, cate_preds, outcome_name) {
q25 <- quantile(cate_preds, 0.25)
q75 <- quantile(cate_preds, 0.75)
ate_low <- mean(Y_vec[W_vec == 1 & cate_preds <= q25]) -
mean(Y_vec[W_vec == 0 & cate_preds <= q25])
ate_mid <- mean(Y_vec[W_vec == 1 & cate_preds > q25 & cate_preds <= q75]) -
mean(Y_vec[W_vec == 0 & cate_preds > q25 & cate_preds <= q75])
ate_high <- mean(Y_vec[W_vec == 1 & cate_preds > q75]) -
mean(Y_vec[W_vec == 0 & cate_preds > q75])
data.frame(Outcome = outcome_name,
Q1_lowest = round(ate_low, 4),
Q2_Q3_middle = round(ate_mid, 4),
Q4_highest = round(ate_high, 4),
Spread = round(ate_high - ate_low, 4))
}
# Spec A: using out-of-fold predictions from k-fold CV
q_table <- rbind(
quartile_analysis(Y, W, oof_cate_phys, "Physical/Sexual"),
quartile_analysis(Y_ctrl, W, oof_cate_ctrl, "Controlling"),
quartile_analysis(Y_emot, W, oof_cate_emot, "Emotional")
)
knitr::kable(q_table, caption = "Spec A: Realized ATEs by Predicted CATE Quartile (K-Fold OOF)")
| Outcome | Q1_lowest | Q2_Q3_middle | Q4_highest | Spread |
|---|---|---|---|---|
| Physical/Sexual | 0.0141 | -0.0400 | -0.1521 | -0.1662 |
| Controlling | -0.0389 | -0.0274 | -0.1875 | -0.1486 |
| Emotional | 0.0065 | -0.0741 | 0.0345 | 0.0280 |
# Spec B: policy-relevant
q_table_pol <- rbind(
quartile_analysis(Y, W, oof_cate_phys_pol, "Physical/Sexual"),
quartile_analysis(Y_ctrl, W, oof_cate_ctrl_pol, "Controlling"),
quartile_analysis(Y_emot, W, oof_cate_emot_pol, "Emotional")
)
knitr::kable(q_table_pol, caption = "Spec B (Policy): Realized ATEs by Predicted CATE Quartile (K-Fold OOF)")
| Outcome | Q1_lowest | Q2_Q3_middle | Q4_highest | Spread |
|---|---|---|---|---|
| Physical/Sexual | 0.0141 | -0.0324 | -0.1521 | -0.1662 |
| Controlling | -0.0461 | -0.0770 | -0.1111 | -0.0650 |
| Emotional | -0.0263 | -0.0410 | -0.0539 | -0.0276 |
# Spec C: purely observable
q_table_obs <- rbind(
quartile_analysis(Y, W, oof_cate_phys_obs, "Physical/Sexual"),
quartile_analysis(Y_ctrl, W, oof_cate_ctrl_obs, "Controlling"),
quartile_analysis(Y_emot, W, oof_cate_emot_obs, "Emotional")
)
knitr::kable(q_table_obs, caption = "Spec C (Observable): Realized ATEs by Predicted CATE Quartile (K-Fold OOF)")
| Outcome | Q1_lowest | Q2_Q3_middle | Q4_highest | Spread |
|---|---|---|---|---|
| Physical/Sexual | 0.0246 | -0.0412 | -0.1609 | -0.1855 |
| Controlling | -0.0379 | -0.0757 | -0.1541 | -0.1162 |
| Emotional | -0.0195 | -0.0742 | 0.0075 | 0.0270 |
# CATE heterogeneity summary across all three specifications
cate_summary <- data.frame(
Specification = rep(c("A (Full)", "B (Policy)", "C (Observable)"), each = 3),
Outcome = rep(c("Physical/Sexual", "Controlling", "Emotional"), 3),
Mean = round(c(mean(oof_cate_phys), mean(oof_cate_ctrl), mean(oof_cate_emot),
mean(oof_cate_phys_pol), mean(oof_cate_ctrl_pol), mean(oof_cate_emot_pol),
mean(oof_cate_phys_obs), mean(oof_cate_ctrl_obs), mean(oof_cate_emot_obs)), 4),
SD = round(c(sd(oof_cate_phys), sd(oof_cate_ctrl), sd(oof_cate_emot),
sd(oof_cate_phys_pol), sd(oof_cate_ctrl_pol), sd(oof_cate_emot_pol),
sd(oof_cate_phys_obs), sd(oof_cate_ctrl_obs), sd(oof_cate_emot_obs)), 4),
Min = round(c(min(oof_cate_phys), min(oof_cate_ctrl), min(oof_cate_emot),
min(oof_cate_phys_pol), min(oof_cate_ctrl_pol), min(oof_cate_emot_pol),
min(oof_cate_phys_obs), min(oof_cate_ctrl_obs), min(oof_cate_emot_obs)), 4),
Max = round(c(max(oof_cate_phys), max(oof_cate_ctrl), max(oof_cate_emot),
max(oof_cate_phys_pol), max(oof_cate_ctrl_pol), max(oof_cate_emot_pol),
max(oof_cate_phys_obs), max(oof_cate_ctrl_obs), max(oof_cate_emot_obs)), 4),
IQR = round(c(IQR(oof_cate_phys), IQR(oof_cate_ctrl), IQR(oof_cate_emot),
IQR(oof_cate_phys_pol), IQR(oof_cate_ctrl_pol), IQR(oof_cate_emot_pol),
IQR(oof_cate_phys_obs), IQR(oof_cate_ctrl_obs), IQR(oof_cate_emot_obs)), 4)
)
knitr::kable(cate_summary, caption = "CATE Distribution Summary Across Specifications (K-Fold OOF)")
| Specification | Outcome | Mean | SD | Min | Max | IQR |
|---|---|---|---|---|---|---|
| A (Full) | Physical/Sexual | -0.0559 | 0.0167 | -0.0732 | -0.0336 | 0.0326 |
| A (Full) | Controlling | -0.0710 | 0.0211 | -0.1083 | -0.0098 | 0.0408 |
| A (Full) | Emotional | -0.0464 | 0.0225 | -0.1089 | 0.0705 | 0.0280 |
| B (Policy) | Physical/Sexual | -0.0594 | 0.0218 | -0.0851 | -0.0287 | 0.0350 |
| B (Policy) | Controlling | -0.0640 | 0.0243 | -0.1691 | 0.0217 | 0.0416 |
| B (Policy) | Emotional | -0.0448 | 0.0159 | -0.0870 | 0.0275 | 0.0163 |
| C (Observable) | Physical/Sexual | -0.0489 | 0.0175 | -0.0713 | -0.0211 | 0.0273 |
| C (Observable) | Controlling | -0.0723 | 0.0152 | -0.0898 | 0.0036 | 0.0117 |
| C (Observable) | Emotional | -0.0422 | 0.0287 | -0.1439 | 0.0501 | 0.0311 |
cf_data_plot <- cf_data
cf_data_plot$cate_phys <- oof_cate_phys
cf_data_plot$cate_group <- cut(oof_cate_phys,
breaks = quantile(oof_cate_phys, c(0, 0.25, 0.75, 1)),
labels = c("Most helped", "Middle", "Least helped"),
include.lowest = TRUE)
profile <- cf_data_plot %>%
group_by(cate_group) %>%
summarise(
n = n(),
age = round(mean(bl_age, na.rm = TRUE), 1),
pct_secondary_edu = round(mean(bl_edu_sec, na.rm = TRUE) * 100, 1),
pct_colombian = round(mean(bl_colom, na.rm = TRUE) * 100, 1),
pct_indigenous = round(mean(bl_indig, na.rm = TRUE) * 100, 1),
pct_married = round(mean(bl_married, na.rm = TRUE) * 100, 1),
pct_baseline_violence = round(mean(bl_physical, na.rm = TRUE) * 100, 1),
mean_asset_index = round(mean(bl_asset_index, na.rm = TRUE), 3),
.groups = "drop"
)
knitr::kable(profile, caption = "Profile of Women by CATE Group (Physical/Sexual Violence)")
| cate_group | n | age | pct_secondary_edu | pct_colombian | pct_indigenous | pct_married | pct_baseline_violence | mean_asset_index |
|---|---|---|---|---|---|---|---|---|
| Most helped | 307 | 36.2 | 38.8 | 34.2 | 3.6 | 44.0 | 14.7 | 0.883 |
| Middle | 674 | 34.1 | 40.7 | 34.1 | 4.5 | 40.5 | 16.6 | 0.270 |
| Least helped | 245 | 35.2 | 32.2 | 38.0 | 3.7 | 45.3 | 16.7 | 0.331 |
The quartile table tells us whether the causal forest is picking up meaningful heterogeneity or just noise. Look at the spread between the top and bottom quartile — a larger spread suggests meaningful heterogeneity detection. With k-fold CV, every observation has a truly out-of-sample CATE prediction, avoiding overfitting concerns. Compare Spec A and Spec B: if the quartile spread shrinks substantially in Spec B, the cross-outcome baseline IPV variables were important heterogeneity drivers (but not actionable for policy).
The profile table helps us see who falls into each CATE group. This is useful for understanding the practical implications of the heterogeneity — if we were to target the intervention, what kind of women should we prioritize?
cat("=== Specification A ===\n")
## === Specification A ===
test_cal <- test_calibration(cf)
print(test_cal)
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 0.99996 0.23897 4.1845 1.531e-05 ***
## differential.forest.prediction -419.90640 12.83985 -32.7034 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n=== Specification B (Policy-Relevant) ===\n")
##
## === Specification B (Policy-Relevant) ===
test_cal_pol <- test_calibration(cf_pol)
print(test_cal_pol)
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 1.07580 0.22989 4.6796 1.597e-06 ***
## differential.forest.prediction -460.88442 14.01695 -32.8805 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\n=== Specification C (Purely Observable) ===\n")
##
## === Specification C (Purely Observable) ===
test_cal_obs <- test_calibration(cf_phys_obs)
print(test_cal_obs)
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 0.60326 0.30567 1.9736 0.02433 *
## differential.forest.prediction -385.32525 14.41575 -26.7295 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The calibration test is key because it tells us whether the causal
forest is detecting real heterogeneity or just overfitting to noise. We
look at two things: the mean.forest.prediction coefficient
tells us if the average prediction is well-calibrated (we want it close
to 1 and significant), and the
differential.forest.prediction tells us if there is actual
heterogeneity. With the refined pipeline (orthogonalized W.hat and
Y.hat, selected covariates), the calibration test may show improved
results compared to the baseline approach. Examine the p-values in the
output above.
vi_phys <- variable_importance(cf)
vi_ctrl <- variable_importance(cf_ctrl)
vi_emot <- variable_importance(cf_emot)
# Diagnostic: check whether the causal forests produced any splits
# If tune.parameters = "all" detects no heterogeneity, it may set min.node.size
# so high that zero splits occur, resulting in zero variable importance.
cat("=== Causal Forest Split Diagnostics (Spec A) ===\n")
## === Causal Forest Split Diagnostics (Spec A) ===
cat("Physical — total importance:", sum(vi_phys),
"| max importance:", max(vi_phys),
"| non-zero vars:", sum(vi_phys > 0), "of", length(vi_phys), "\n")
## Physical — total importance: 0 | max importance: 0 | non-zero vars: 0 of 10
cat("Controlling — total importance:", sum(vi_ctrl),
"| max importance:", max(vi_ctrl),
"| non-zero vars:", sum(vi_ctrl > 0), "of", length(vi_ctrl), "\n")
## Controlling — total importance: 1 | max importance: 0.2757637 | non-zero vars: 9 of 9
cat("Emotional — total importance:", sum(vi_emot),
"| max importance:", max(vi_emot),
"| non-zero vars:", sum(vi_emot > 0), "of", length(vi_emot), "\n")
## Emotional — total importance: 1 | max importance: 0.1847139 | non-zero vars: 10 of 10
if (sum(vi_phys) == 0) {
cat("\nNOTE: The Physical/Sexual violence causal forest has zero variable importance.\n")
cat("This means the forest detected no significant treatment effect heterogeneity\n")
cat("and tuned itself to not split. This is a genuine result, not a code error.\n")
cat("The forest effectively estimates a constant treatment effect for this outcome.\n")
}
##
## NOTE: The Physical/Sexual violence causal forest has zero variable importance.
## This means the forest detected no significant treatment effect heterogeneity
## and tuned itself to not split. This is a genuine result, not a code error.
## The forest effectively estimates a constant treatment effect for this outcome.
# Also compute unweighted split frequencies as an alternative diagnostic
# decay.exponent = 0 weights all splits equally regardless of depth
vi_phys_unw <- variable_importance(cf, decay.exponent = 0, max.depth = 4)
cat("\nPhysical unweighted importance (decay.exponent=0):\n")
##
## Physical unweighted importance (decay.exponent=0):
print(round(as.numeric(vi_phys_unw), 6))
## [1] 0 0 0 0 0 0 0 0 0 0
# Each forest uses different selected variables — create separate importance data frames
vi_phys_df <- data.frame(variable = names(X)[selected_phys],
Importance = round(as.numeric(vi_phys), 6))
vi_ctrl_df <- data.frame(variable = names(X)[selected_ctrl],
Importance = round(as.numeric(vi_ctrl), 6))
vi_emot_df <- data.frame(variable = names(X)[selected_emot],
Importance = round(as.numeric(vi_emot), 6))
# Wide format summary table
vi_wide <- vi_phys_df %>%
rename(Physical = Importance) %>%
full_join(vi_ctrl_df %>% rename(Controlling = Importance), by = "variable") %>%
full_join(vi_emot_df %>% rename(Emotional = Importance), by = "variable") %>%
arrange(desc(Physical))
knitr::kable(vi_wide, caption = "Causal Forest Variable Importance (Selected Variables Only)")
| variable | Physical | Controlling | Emotional |
|---|---|---|---|
| bl_age | 0 | 0.216908 | 0.165550 |
| bl_married | 0 | 0.036659 | 0.059608 |
| bl_partn_age | 0 | 0.189924 | 0.172988 |
| bl_nchildren0t5 | 0 | 0.160500 | 0.121671 |
| bl_nchildren6t14 | 0 | 0.087424 | 0.106971 |
| bl_asset_index | 0 | 0.275764 | 0.184714 |
| norooms | 0 | NA | NA |
| bl_physical | 0 | 0.002715 | 0.034661 |
| bl_emotional | 0 | 0.022640 | 0.051915 |
| bl_control | 0 | 0.007466 | 0.038109 |
| bl_carchi | NA | NA | 0.063814 |
ggplot(vi_phys_df %>% arrange(desc(Importance)) %>% head(10),
aes(x = reorder(variable, Importance), y = Importance)) +
geom_col(fill = "steelblue", alpha = 0.8) +
coord_flip() +
labs(title = "Top Variables Driving Heterogeneity (Physical/Sexual Violence)",
subtitle = "Specification A",
x = "", y = "Importance") +
theme_minimal()
# Specification C: causal forest variable importance
vi_phys_obs_cf <- variable_importance(cf_phys_obs)
vi_ctrl_obs_cf <- variable_importance(cf_ctrl_obs)
vi_emot_obs_cf <- variable_importance(cf_emot_obs)
cat("\n=== Causal Forest Split Diagnostics (Spec C) ===\n")
##
## === Causal Forest Split Diagnostics (Spec C) ===
cat("Physical — total importance:", sum(vi_phys_obs_cf),
"| max importance:", max(vi_phys_obs_cf),
"| non-zero vars:", sum(vi_phys_obs_cf > 0), "of", length(vi_phys_obs_cf), "\n")
## Physical — total importance: 0 | max importance: 0 | non-zero vars: 0 of 11
cat("Controlling — total importance:", sum(vi_ctrl_obs_cf),
"| max importance:", max(vi_ctrl_obs_cf),
"| non-zero vars:", sum(vi_ctrl_obs_cf > 0), "of", length(vi_ctrl_obs_cf), "\n")
## Controlling — total importance: 0.9560976 | max importance: 0.1682071 | non-zero vars: 12 of 16
cat("Emotional — total importance:", sum(vi_emot_obs_cf),
"| max importance:", max(vi_emot_obs_cf),
"| non-zero vars:", sum(vi_emot_obs_cf > 0), "of", length(vi_emot_obs_cf), "\n")
## Emotional — total importance: 1 | max importance: 0.1648512 | non-zero vars: 12 of 12
if (sum(vi_phys_obs_cf) == 0) {
cat("\nNOTE: Spec C Physical/Sexual violence causal forest also has zero importance.\n")
cat("Confirms no detectable heterogeneity for this outcome with observable covariates.\n")
}
##
## NOTE: Spec C Physical/Sexual violence causal forest also has zero importance.
## Confirms no detectable heterogeneity for this outcome with observable covariates.
vi_phys_obs_cf_df <- data.frame(variable = names(X_pol_obs)[selected_phys_obs],
Importance = round(as.numeric(vi_phys_obs_cf), 6))
vi_ctrl_obs_cf_df <- data.frame(variable = names(X_pol_obs)[selected_ctrl_obs],
Importance = round(as.numeric(vi_ctrl_obs_cf), 6))
vi_emot_obs_cf_df <- data.frame(variable = names(X_pol_obs)[selected_emot_obs],
Importance = round(as.numeric(vi_emot_obs_cf), 6))
vi_wide_obs <- vi_phys_obs_cf_df %>%
rename(Physical = Importance) %>%
full_join(vi_ctrl_obs_cf_df %>% rename(Controlling = Importance), by = "variable") %>%
full_join(vi_emot_obs_cf_df %>% rename(Emotional = Importance), by = "variable") %>%
arrange(desc(Physical))
knitr::kable(vi_wide_obs, caption = "Causal Forest Variable Importance — Specification C (Purely Observable)")
| variable | Physical | Controlling | Emotional |
|---|---|---|---|
| bl_colom | 0 | 0.053153 | 0.046467 |
| bl_age | 0 | 0.162344 | 0.141687 |
| bl_married | 0 | 0.052089 | 0.045651 |
| bl_black | 0 | 0.000000 | NA |
| bl_dempl | 0 | 0.046696 | 0.055330 |
| bl_partn_sedu | 0 | 0.050825 | 0.063719 |
| bl_partn_age | 0 | 0.159405 | 0.153237 |
| bl_nchildren0t5 | 0 | 0.067308 | 0.105380 |
| bl_nchildren6t14 | 0 | 0.078727 | 0.083660 |
| bl_asset_index | 0 | 0.168207 | 0.164851 |
| norooms | 0 | 0.000540 | 0.021985 |
| bl_head | NA | 0.000000 | NA |
| bl_edu_sec | NA | 0.062601 | 0.066964 |
| bl_indig | NA | 0.000000 | NA |
| ownhouse | NA | 0.000000 | NA |
| bl_carchi | NA | 0.054201 | 0.051068 |
ggplot(vi_phys_obs_cf_df %>% arrange(desc(Importance)) %>% head(10),
aes(x = reorder(variable, Importance), y = Importance)) +
geom_col(fill = "darkgreen", alpha = 0.8) +
coord_flip() +
labs(title = "Top Variables Driving Heterogeneity (Physical/Sexual Violence)",
subtitle = "Specification C (Purely Observable)",
x = "", y = "Importance") +
theme_minimal()
The variable importance from causal forests measures which covariates drive treatment effect heterogeneity — this is conceptually different from the outcome regression forest importance in Step 2 (which measures covariates that predict IPV levels). A variable can be a strong predictor of outcomes without driving heterogeneous treatment effects. If the causal forest importance is zero for physical/sexual violence, it means the forest detected no significant heterogeneity in the treatment effect for that outcome and tuned itself to produce essentially no splits. This is a genuine finding — the treatment effect on physical violence is approximately constant across the covariate space — and is consistent with the calibration test results and the original paper’s finding of a small, borderline-significant effect on physical violence.
Since this is an RCT, the propensity scores should be approximately constant across individuals and the distributions for treated and control groups should overlap nearly perfectly. This is a basic sanity check — any separation would signal problems with the randomization.
overlap_df <- data.frame(
W_hat = W_hat,
Treatment = factor(W, labels = c("Control", "Treated"))
)
ggplot(overlap_df, aes(x = W_hat, fill = Treatment)) +
geom_density(alpha = 0.5) +
geom_vline(xintercept = mean(W), lty = 2, col = "black") +
labs(title = "Propensity Score Distribution by Treatment Status",
subtitle = "RCT: distributions should overlap almost perfectly",
x = "Estimated Propensity Score (W.hat)", y = "Density") +
theme_minimal() +
scale_fill_brewer(palette = "Set1")
cat("Propensity score range (Control):",
round(range(W_hat[W == 0]), 4), "\n")
## Propensity score range (Control): 0.6157 0.8293
cat("Propensity score range (Treated):",
round(range(W_hat[W == 1]), 4), "\n")
## Propensity score range (Treated): 0.6114 0.8387
cat("Mean propensity score (Control):",
round(mean(W_hat[W == 0]), 4), "\n")
## Mean propensity score (Control): 0.7117
cat("Mean propensity score (Treated):",
round(mean(W_hat[W == 1]), 4), "\n")
## Mean propensity score (Treated): 0.7211
The propensity scores should cluster tightly around 0.719 for both groups. Any meaningful separation would be a red flag, but in this RCT context we expect near-perfect overlap, confirming that the orthogonalization with W.hat is appropriate.
We randomly shuffle the treatment assignment and re-estimate the causal forest for physical/sexual violence. Under the null of no treatment effect and no heterogeneity, the resulting CATEs should collapse to near zero. If the placebo forest still finds large treatment effects or heterogeneity, it would suggest our main results are driven by noise.
set.seed(42)
# Shuffle treatment assignment
W_placebo <- sample(W)
# Re-estimate propensity and outcome forests with placebo W
rf_W_placebo <- regression_forest(X, W_placebo, tune.parameters = "all")
W_hat_placebo <- predict(rf_W_placebo)$predictions
cf_placebo <- causal_forest(X[, selected_phys], Y, W_placebo,
W.hat = W_hat_placebo,
Y.hat = Y_hat_phys,
num.trees = 4000,
tune.parameters = "all")
ate_placebo <- average_treatment_effect(cf_placebo, target.sample = "all")
cate_placebo <- predict(cf_placebo, X[, selected_phys])$predictions
# Spec C placebo
cf_placebo_obs <- causal_forest(X_pol_obs[, selected_phys_obs], Y, W_placebo,
W.hat = W_hat_placebo,
Y.hat = Y_hat_phys_obs,
num.trees = 4000,
tune.parameters = "all")
ate_placebo_obs <- average_treatment_effect(cf_placebo_obs, target.sample = "all")
cate_placebo_obs <- predict(cf_placebo_obs, X_pol_obs[, selected_phys_obs])$predictions
cat("=== Placebo Test (Spec A) ===\n")
## === Placebo Test (Spec A) ===
cat("Placebo ATE:", round(ate_placebo[1], 4), " SE:", round(ate_placebo[2], 4), "\n")
## Placebo ATE: 0.0097 SE: 0.022
cat("Placebo CATE range:", round(range(cate_placebo), 4), "\n")
## Placebo CATE range: 0.0096 0.0096
cat("Placebo CATE SD:", round(sd(cate_placebo), 4), "\n\n")
## Placebo CATE SD: 0
cat("=== Placebo Test (Spec C) ===\n")
## === Placebo Test (Spec C) ===
cat("Placebo ATE:", round(ate_placebo_obs[1], 4), " SE:", round(ate_placebo_obs[2], 4), "\n")
## Placebo ATE: 0.0162 SE: 0.0235
cat("Placebo CATE range:", round(range(cate_placebo_obs), 4), "\n")
## Placebo CATE range: 0.0148 0.0167
cat("Placebo CATE SD:", round(sd(cate_placebo_obs), 4), "\n\n")
## Placebo CATE SD: 3e-04
# Calibration test on placebo
cat("Placebo calibration test (Spec A):\n")
## Placebo calibration test (Spec A):
print(test_calibration(cf_placebo))
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 1.9065 1.5311 1.2452 0.1067
## differential.forest.prediction -426.8302 14.9996 -28.4560 1.0000
cat("\nPlacebo calibration test (Spec C):\n")
##
## Placebo calibration test (Spec C):
print(test_calibration(cf_placebo_obs))
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 1.6828 1.0053 1.6738 0.04721 *
## differential.forest.prediction -414.8744 14.6794 -28.2624 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Compare CATE distributions
par(mfrow = c(1, 3))
d_real <- density(oob_cate_phys)
d_plac <- density(cate_placebo)
d_plac_obs <- density(cate_placebo_obs)
xlims <- range(c(d_real$x, d_plac$x, d_plac_obs$x))
plot(d_real, main = "Real Treatment CATEs (Spec A)",
xlab = "Treatment Effect", xlim = xlims)
abline(v = 0, col = "blue", lty = 3)
abline(v = mean(oob_cate_phys), col = "red", lty = 3)
plot(d_plac, main = "Placebo CATEs (Spec A, Shuffled W)",
xlab = "Treatment Effect", xlim = xlims)
abline(v = 0, col = "blue", lty = 3)
abline(v = mean(cate_placebo), col = "red", lty = 3)
plot(d_plac_obs, main = "Placebo CATEs (Spec C, Shuffled W)",
xlab = "Treatment Effect", xlim = xlims)
abline(v = 0, col = "blue", lty = 3)
abline(v = mean(cate_placebo_obs), col = "red", lty = 3)
par(mfrow = c(1, 1))
If the pipeline is working correctly, the placebo ATE should be close to zero (within noise), the CATE distribution should be much tighter than the real one, and the calibration test should show no significance. This confirms that our main findings are not artefacts of the forest overfitting.
We compare our refined causal forest (with pre-selected covariates and orthogonalization) against a baseline forest using all 19 covariates without orthogonalization. If the results are qualitatively similar, it increases confidence. If the refined pipeline shows tighter estimates or better calibration, it demonstrates the value of the two-step approach.
set.seed(100241)
# Baseline causal forest: all covariates, no orthogonalization
cf_full <- causal_forest(X, Y, W,
num.trees = 4000,
sample.fraction = 0.5,
mtry = min(ceiling(sqrt(ncol(X)) + 20), ncol(X)),
min.node.size = 20,
honesty = TRUE,
honesty.fraction = 0.5,
honesty.prune.leaves = TRUE,
tune.parameters = "none",
compute.oob.predictions = TRUE)
ate_full <- average_treatment_effect(cf_full, target.sample = "all")
cate_full <- predict(cf_full)$predictions # OOB predictions
cat("=== Physical/Sexual Violence ===\n")
## === Physical/Sexual Violence ===
cat("Full covariates ATE:", round(ate_full[1], 4), " SE:", round(ate_full[2], 4), "\n")
## Full covariates ATE: -0.0583 SE: 0.0238
cat("Selected covariates (Spec A) ATE:", round(ate_all[1], 4), " SE:", round(ate_all[2], 4), "\n")
## Selected covariates (Spec A) ATE: -0.0577 SE: 0.0239
cat("Observable covariates (Spec C) ATE:", round(ate_all_obs[1], 4), " SE:", round(ate_all_obs[2], 4), "\n\n")
## Observable covariates (Spec C) ATE: -0.0487 SE: 0.0252
# Calibration comparison
cat("Calibration test — Full covariates:\n")
## Calibration test — Full covariates:
print(test_calibration(cf_full))
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 0.97190 0.40534 2.3978 0.008322 **
## differential.forest.prediction -0.89774 1.40927 -0.6370 0.737884
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\nCalibration test — Selected covariates (Spec A):\n")
##
## Calibration test — Selected covariates (Spec A):
print(test_calibration(cf))
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 0.99996 0.23897 4.1845 1.531e-05 ***
## differential.forest.prediction -419.90640 12.83985 -32.7034 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
cat("\nCalibration test — Observable covariates (Spec C):\n")
##
## Calibration test — Observable covariates (Spec C):
print(test_calibration(cf_phys_obs))
##
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
##
## Estimate Std. Error t value Pr(>t)
## mean.forest.prediction 0.60326 0.30567 1.9736 0.02433 *
## differential.forest.prediction -385.32525 14.41575 -26.7295 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Compare CATE distributions
par(mfrow = c(1, 3))
d_full <- density(cate_full)
d_sel <- density(oob_cate_phys)
cate_obs_oob <- predict(cf_phys_obs)$predictions
d_obs <- density(cate_obs_oob)
xlims <- range(c(d_full$x, d_sel$x, d_obs$x))
plot(d_full, main = "CATEs: Full Covariates (19 vars)",
xlab = "Treatment Effect", xlim = xlims)
abline(v = c(0, mean(cate_full)), col = c("blue", "red"), lty = 3)
plot(d_sel, main = paste0("CATEs: Spec A Selected (", length(selected_phys), " vars)"),
xlab = "Treatment Effect", xlim = xlims)
abline(v = c(0, mean(oob_cate_phys)), col = c("blue", "red"), lty = 3)
plot(d_obs, main = paste0("CATEs: Spec C Observable (", length(selected_phys_obs), " vars)"),
xlab = "Treatment Effect", xlim = xlims)
abline(v = c(0, mean(cate_obs_oob)), col = c("blue", "red"), lty = 3)
par(mfrow = c(1, 1))
comparison_robustness <- data.frame(
Specification = c("Full covariates (baseline)",
"Spec A: Selected + orthogonalized",
"Spec C: Observable + orthogonalized"),
ATE = round(c(ate_full[1], ate_all[1], ate_all_obs[1]), 4),
SE = round(c(ate_full[2], ate_all[2], ate_all_obs[2]), 4),
CATE_SD = round(c(sd(cate_full), sd(oob_cate_phys), sd(cate_obs_oob)), 4),
CATE_Range = c(
paste0("[", round(min(cate_full), 4), ", ", round(max(cate_full), 4), "]"),
paste0("[", round(min(oob_cate_phys), 4), ", ", round(max(oob_cate_phys), 4), "]"),
paste0("[", round(min(cate_obs_oob), 4), ", ", round(max(cate_obs_oob), 4), "]")
),
N_vars = c(ncol(X), length(selected_phys), length(selected_phys_obs))
)
knitr::kable(comparison_robustness,
caption = "Comparison: Full vs Selected vs Observable Covariates (Physical/Sexual Violence)")
| Specification | ATE | SE | CATE_SD | CATE_Range | N_vars |
|---|---|---|---|---|---|
| Full covariates (baseline) | -0.0583 | 0.0238 | 0.0173 | [-0.0916, -0.0253] | 19 |
| Spec A: Selected + orthogonalized | -0.0577 | 0.0239 | 0.0009 | [-0.0598, -0.0533] | 10 |
| Spec C: Observable + orthogonalized | -0.0487 | 0.0252 | 0.0010 | [-0.0557, -0.0474] | 11 |
The key things to look for: (1) ATEs should be similar across specifications — if they diverge substantially, one model may be misspecified. (2) The refined pipeline should ideally show a similar or tighter CATE distribution with better calibration test results, confirming that pre-selection and orthogonalization helped rather than hurt.
The default threshold for variable selection is 0.2 × mean importance. Here we test two alternative thresholds — a more permissive one (0.1×, keeps more variables) and a stricter one (0.5×, keeps fewer) — to check whether results are sensitive to this choice.
set.seed(100241)
thresholds <- c(0.1, 0.2, 0.5)
sensitivity_results <- data.frame()
for (thr in thresholds) {
sel <- which(Y_vi_phys / mean(Y_vi_phys) > thr)
n_sel <- length(sel)
cf_sens <- causal_forest(X[, sel], Y, W,
W.hat = W_hat,
Y.hat = Y_hat_phys,
num.trees = 4000,
tune.parameters = "all")
ate_sens <- average_treatment_effect(cf_sens, target.sample = "all")
cate_sens <- predict(cf_sens, X[, sel])$predictions
cal_sens <- test_calibration(cf_sens)
sensitivity_results <- rbind(sensitivity_results, data.frame(
Threshold = paste0(thr, "x"),
N_vars = n_sel,
Variables = paste(names(X)[sel], collapse = ", "),
ATE = round(ate_sens[1], 4),
SE = round(ate_sens[2], 4),
CATE_SD = round(sd(cate_sens), 4),
Cal_mean_p = round(cal_sens[1, 4], 4),
Cal_diff_p = round(cal_sens[2, 4], 4)
))
}
knitr::kable(sensitivity_results %>% select(-Variables),
caption = "Sensitivity to Variable Selection Threshold (Physical/Sexual Violence)",
col.names = c("Threshold", "# Vars", "ATE", "SE", "CATE SD",
"Calibration (mean) p", "Calibration (diff) p"))
| Threshold | # Vars | ATE | SE | CATE SD | Calibration (mean) p | Calibration (diff) p | |
|---|---|---|---|---|---|---|---|
| estimate | 0.1x | 14 | -0.0578 | 0.0239 | 5e-04 | 0.0025 | 1 |
| estimate1 | 0.2x | 10 | -0.0577 | 0.0239 | 0e+00 | 0.0004 | 1 |
| estimate2 | 0.5x | 6 | -0.0577 | 0.0239 | 0e+00 | 0.0000 | 1 |
cat("Variables selected at each threshold:\n\n")
## Variables selected at each threshold:
for (i in 1:nrow(sensitivity_results)) {
cat(sensitivity_results$Threshold[i], "(", sensitivity_results$N_vars[i], "vars):",
sensitivity_results$Variables[i], "\n\n")
}
## 0.1x ( 14 vars): bl_age, bl_edu_sec, bl_married, bl_dempl, bl_partn_sedu, bl_partn_age, bl_nchildren0t5, bl_nchildren6t14, bl_asset_index, norooms, bl_carchi, bl_physical, bl_emotional, bl_control
##
## 0.2x ( 10 vars): bl_age, bl_married, bl_partn_age, bl_nchildren0t5, bl_nchildren6t14, bl_asset_index, norooms, bl_physical, bl_emotional, bl_control
##
## 0.5x ( 6 vars): bl_age, bl_partn_age, bl_asset_index, bl_physical, bl_emotional, bl_control
If the ATE is stable across thresholds but the calibration test improves at one particular threshold, that gives guidance on the optimal level of pre-selection. Large swings in ATE would suggest the results are sensitive and should be interpreted cautiously.
The average_treatment_effect function provides
asymptotic standard errors. Here we complement these with bootstrap
confidence intervals by re-estimating the causal forest on B bootstrap
samples and collecting the ATE distribution.
set.seed(12345)
B <- 200 # number of bootstrap replications
boot_ate_phys <- numeric(B)
boot_ate_ctrl <- numeric(B)
boot_ate_emot <- numeric(B)
n <- nrow(cf_data) # re-stated explicitly; cf_data is unchanged from cv-setup
for (b in 1:B) {
idx <- sample(1:n, n, replace = TRUE)
X_b <- X[idx, ]
Y_b <- Y[idx]
W_b <- W[idx]
Y_ctrl_b <- Y_ctrl[idx]
Y_emot_b <- Y_emot[idx]
W_hat_b <- W_hat[idx]
Y_hat_phys_b <- Y_hat_phys[idx]
Y_hat_ctrl_b <- Y_hat_ctrl[idx]
Y_hat_emot_b <- Y_hat_emot[idx]
cf_b_phys <- causal_forest(X_b[, selected_phys], Y_b, W_b,
W.hat = W_hat_b, Y.hat = Y_hat_phys_b,
num.trees = 2000, tune.parameters = "none")
boot_ate_phys[b] <- average_treatment_effect(cf_b_phys, target.sample = "all")[1]
cf_b_ctrl <- causal_forest(X_b[, selected_ctrl], Y_ctrl_b, W_b,
W.hat = W_hat_b, Y.hat = Y_hat_ctrl_b,
num.trees = 2000, tune.parameters = "none")
boot_ate_ctrl[b] <- average_treatment_effect(cf_b_ctrl, target.sample = "all")[1]
cf_b_emot <- causal_forest(X_b[, selected_emot], Y_emot_b, W_b,
W.hat = W_hat_b, Y.hat = Y_hat_emot_b,
num.trees = 2000, tune.parameters = "none")
boot_ate_emot[b] <- average_treatment_effect(cf_b_emot, target.sample = "all")[1]
}
# Summary table
boot_table <- data.frame(
Outcome = c("Physical/Sexual", "Controlling", "Emotional"),
Point_ATE = round(c(ate_all[1], ate_ctrl[1], ate_emot[1]), 4),
Asymptotic_SE = round(c(ate_all[2], ate_ctrl[2], ate_emot[2]), 4),
Boot_SE = round(c(sd(boot_ate_phys), sd(boot_ate_ctrl), sd(boot_ate_emot)), 4),
Boot_CI_lower = round(c(
quantile(boot_ate_phys, 0.025),
quantile(boot_ate_ctrl, 0.025),
quantile(boot_ate_emot, 0.025)), 4),
Boot_CI_upper = round(c(
quantile(boot_ate_phys, 0.975),
quantile(boot_ate_ctrl, 0.975),
quantile(boot_ate_emot, 0.975)), 4)
)
knitr::kable(boot_table,
col.names = c("Outcome", "Point ATE", "Asymptotic SE", "Bootstrap SE",
"Boot 2.5%", "Boot 97.5%"),
caption = "Bootstrap vs Asymptotic ATE Inference (B = 200)")
| Outcome | Point ATE | Asymptotic SE | Bootstrap SE | Boot 2.5% | Boot 97.5% |
|---|---|---|---|---|---|
| Physical/Sexual | -0.0577 | 0.0239 | 0.0238 | -0.0998 | -0.0065 |
| Controlling | -0.0697 | 0.0293 | 0.0278 | -0.1258 | -0.0229 |
| Emotional | -0.0464 | 0.0287 | 0.0276 | -0.0972 | 0.0081 |
par(mfrow = c(1, 3))
hist(boot_ate_phys, breaks = 30, main = "Bootstrap ATEs: Physical/Sexual",
xlab = "ATE", col = "steelblue", border = "white")
abline(v = ate_all[1], col = "red", lwd = 2)
abline(v = 0, col = "blue", lty = 3)
hist(boot_ate_ctrl, breaks = 30, main = "Bootstrap ATEs: Controlling",
xlab = "ATE", col = "steelblue", border = "white")
abline(v = ate_ctrl[1], col = "red", lwd = 2)
abline(v = 0, col = "blue", lty = 3)
hist(boot_ate_emot, breaks = 30, main = "Bootstrap ATEs: Emotional",
xlab = "ATE", col = "steelblue", border = "white")
abline(v = ate_emot[1], col = "red", lwd = 2)
abline(v = 0, col = "blue", lty = 3)
par(mfrow = c(1, 1))
If the bootstrap SEs are similar to the asymptotic SEs, this confirms that the asymptotic theory works well in this sample. If the bootstrap CIs are wider, it suggests the asymptotic SEs may be too optimistic. The red line shows the point estimate and the blue dashed line marks zero — if the bulk of the bootstrap distribution falls to one side of zero, the treatment effect is robust.
This analysis follows the recommended pipeline from Athey & Wager:
Step 1 — Propensity Forest: Confirms randomization by showing no covariate strongly predicts treatment assignment. Generates W.hat for orthogonalization.
Step 2 — Outcome Regression Forests: Identifies which covariates are strong predictors of each IPV outcome. Generates Y.hat for orthogonalization and selects variables above the 0.2 × mean importance threshold.
Step 3 — Refined Causal Forests: Uses selected covariates plus W.hat and Y.hat for orthogonalization, improving the signal-to-noise ratio for treatment effect heterogeneity detection.
| Specification | Description | Covariates Excluded |
|---|---|---|
| A | Full covariates | None |
| B | Policy-relevant (keep baseline same outcome) | Cross-outcome baseline IPV |
| C | Purely observable targeting | All baseline IPV (bl_physical, bl_emotional, bl_control) |
Specification C represents the most policy-realistic scenario: policymakers only have access to observable demographic and socioeconomic characteristics for targeting, with no confidential IPV survey data at all.
Married women seem to benefit less than women in informal unions — possibly reflecting differences in bargaining dynamics within relationships.
Indigenous women tend to benefit more, especially for controlling behaviors — a potentially important finding for targeting.
Having more young children (0-5) dampens the treatment effect across multiple outcomes.
Province effects (Carchi vs Sucumbíos) may moderate treatment effectiveness.
Five robustness checks were conducted:
Overlap check (5.1): Propensity scores should be tightly clustered for both groups, confirming the RCT design and validating the use of W.hat orthogonalization.
Placebo test (5.2): Shuffling treatment labels should collapse ATEs and CATEs to near zero with no significant calibration, confirming results are not artefacts.
Full vs selected covariates (5.3): Comparing the refined pipeline against a baseline forest with all covariates. Similar ATEs increase confidence; better calibration from the refined pipeline validates the two-step approach.
Threshold sensitivity (5.4): ATE stability across selection thresholds (0.1×, 0.2×, 0.5×) confirms results are not driven by an arbitrary variable selection cutoff.
Bootstrap CIs (5.5): 200-replicate bootstrap provides non-parametric confidence intervals for comparison against asymptotic SEs.