Part 1: Data Preparation & Replication

Load Libraries

library(grf)
library(haven)
library(dplyr)
library(ggplot2)
library(corrplot)
library(caret)
library(sandwich)
library(lmtest)
library(margins)
library(Hmisc)
library(tidyr)

Load Data

# Data: Hidrobo et al. (2016) replication archive, Harvard Dataverse
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/YBGPAP
# Place DV_baseline_followup.dta at Data-and-dofiles/Data/ relative to this file.
df_raw <- read_dta("Data-and-dofiles/Data/DV_baseline_followup.dta")
dim(df_raw)

## [1] 2357  229

Replicate Variable Construction from the Stata Do-File

The original do-file constructs treatment indicators, IPV outcomes, control variables, and sample eligibility flags. We replicate this step by step.

df <- df_raw

# --- Treatment indicators ---
df$p_treat <- as.numeric(df$bl_p_treat == 1)
df$Treatment <- as.numeric(df$bl_modalidad)

df$treat_food    <- as.numeric(df$Treatment == 2)
df$treat_cash    <- as.numeric(df$Treatment == 3)
df$treat_voucher <- as.numeric(df$Treatment == 4)

# --- Relationship indicator ---
df$bl_inrelat <- as.numeric(df$bl_b06 == 2 | df$bl_b06 == 3)
df$el_inrelat <- as.numeric(df$el_b06 == 2 | df$el_b06 == 3)

# --- Province ---
df$bl_carchi <- as.numeric(df$bl_a02 == 1)

# --- Endline IPV outcomes (last 6 months) ---
ipv_el_vars <- c("el_t10b","el_t11b","el_t12b","el_t13b","el_t14b",
                  "el_t15b","el_t16b","el_t17b","el_t18b","el_t19b",
                  "el_t20b","el_t21b","el_t22b","el_t23b",
                  "el_t24b","el_t25b","el_t26b","el_t27b","el_t28b")

ipv_el_life <- c("el_t10","el_t11","el_t12","el_t13","el_t14",
                  "el_t15","el_t16","el_t17","el_t18","el_t19",
                  "el_t20","el_t21","el_t22","el_t23",
                  "el_t24","el_t25","el_t26","el_t27","el_t28")

for (i in seq_along(ipv_el_vars)) {
  bvar <- ipv_el_vars[i]
  lvar <- ipv_el_life[i]
  if (bvar %in% names(df) & lvar %in% names(df)) {
    df[[bvar]] <- ifelse(is.na(df[[bvar]]) & df[[lvar]] == 0, 0, df[[bvar]])
    df[[bvar]] <- as.numeric(df[[bvar]] == 1)
  }
}

# Same for baseline
ipv_bl_vars <- c("bl_t10b","bl_t11b","bl_t12b","bl_t13b","bl_t14b",
                  "bl_t15b","bl_t16b","bl_t17b","bl_t18b","bl_t19b",
                  "bl_t20b","bl_t21b","bl_t22b","bl_t23b")

ipv_bl_life <- c("bl_t10","bl_t11","bl_t12","bl_t13","bl_t14",
                  "bl_t15","bl_t16","bl_t17","bl_t18","bl_t19",
                  "bl_t20","bl_t21","bl_t22","bl_t23")

for (i in seq_along(ipv_bl_vars)) {
  bvar <- ipv_bl_vars[i]
  lvar <- ipv_bl_life[i]
  if (bvar %in% names(df) & lvar %in% names(df)) {
    df[[bvar]] <- ifelse(is.na(df[[bvar]]) & df[[lvar]] == 0, 0, df[[bvar]])
    df[[bvar]] <- as.numeric(df[[bvar]] == 1)
  }
}

# --- Composite IPV outcomes ---

# Endline
df$el_control   <- as.numeric(df$el_t10b == 1 | df$el_t11b == 1 | 
                                df$el_t25b == 1 | df$el_t26b == 1)
df$el_emotional <- as.numeric(df$el_t12b == 1 | df$el_t13b == 1 | df$el_t14b == 1 | 
                                df$el_t24b == 1 | df$el_t27b == 1 | df$el_t28b == 1)
df$el_physical  <- as.numeric(df$el_t15b == 1 | df$el_t16b == 1 | df$el_t17b == 1 | 
                                df$el_t18b == 1 | df$el_t19b == 1 | df$el_t20b == 1 | 
                                df$el_t21b == 1 | df$el_t22b == 1 | df$el_t23b == 1)

# Baseline
df$bl_control   <- as.numeric(df$bl_t10b == 1 | df$bl_t11b == 1)
df$bl_emotional <- as.numeric(df$bl_t12b == 1 | df$bl_t13b == 1 | df$bl_t14b == 1)
df$bl_physical  <- as.numeric(df$bl_t15b == 1 | df$bl_t16b == 1 | df$bl_t17b == 1 | 
                                df$bl_t18b == 1 | df$bl_t19b == 1 | df$bl_t20b == 1 | 
                                df$bl_t21b == 1 | df$bl_t22b == 1 | df$bl_t23b == 1)

# Baseline lifetime violence
df$bl_lphysical <- as.numeric(df$bl_t15 == 1 | df$bl_t16 == 1 | df$bl_t17 == 1 | 
                                df$bl_t18 == 1 | df$bl_t19 == 1 | df$bl_t20 == 1 | 
                                df$bl_t21 == 1)
df$bl_lsexual   <- as.numeric(df$bl_t22 == 1 | df$bl_t23 == 1)
df$bl_lphysex   <- as.numeric(df$bl_lphysical == 1 | df$bl_lsexual == 1)

# --- Control variables ---
df$bl_edu_sec  <- as.numeric(df$bl_b09 > 3)
df$bl_age      <- as.numeric(df$bl_b04)
df$bl_colom    <- as.numeric(df$bl_b08 == 2)
df$bl_married  <- as.numeric(df$bl_b06 == 3)
df$bl_indig    <- as.numeric(df$bl_b07 == 2)
df$bl_black    <- as.numeric(df$bl_b07 == 3 | df$bl_b07 == 5)
df$bl_head     <- as.numeric(df$bl_b03 == 1)
df$bl_dempl    <- as.numeric(df$bl_d05 != 5)

df$ownhouse <- as.numeric(
  (df$bl_e12 == 1 & df$bl_b03 == 1) | (df$bl_e12 == 2 & df$bl_b03 == 2)
)

# --- Partner variables ---
df$bl_partn_edu <- ifelse(df$bl_b03 == 2, df$bl_mhead_edu, df$bl_mspouse_edu)
df$bl_partn_sedu <- as.numeric(df$bl_partn_edu >= 4 & df$bl_partn_edu < 9)
df$bl_partn_age <- ifelse(df$bl_b03 == 2, df$bl_mhead_age, df$bl_mspouse_age)

# --- Household controls ---
df$bl_nchildren0t5  <- as.numeric(df$bl_nchildren0t5)
df$bl_nchildren6t14 <- as.numeric(df$bl_nchildren6t14)

df$bl_asset_cat <- as.numeric(cut(df$bl_asset_index, 
                                   breaks = quantile(df$bl_asset_index, probs = 0:4/4, na.rm = TRUE),
                                   include.lowest = TRUE, labels = 1:4))
df$wealth2 <- as.numeric(df$bl_asset_cat == 2)
df$wealth3 <- as.numeric(df$bl_asset_cat == 3)
df$wealth4 <- as.numeric(df$bl_asset_cat == 4)

df$norooms <- as.numeric(df$bl_e06 == 0)

# --- Consumption ---
df$bl_totcons_pc <- as.numeric(df$bl_fneh_totcons_pc) * (365/12)
df$bl_nfcons_pc  <- as.numeric(df$bl_nfcons_pc) * (365/12)

# --- Eligibility & analysis sample ---
df$eligible <- as.numeric(
  df$bl_inrelat == 1 & 
  as.numeric(df$bl_b04) < 70 & 
  df$bl_tnohaymujer == 1 & 
  (df$bl_b03 == 1 | df$bl_b03 == 2)
)

df$foranalysis <- as.numeric(
  df$bl_tb == 1 & df$el_tb == 1 & df$diffid == 0 & df$eligible == 1
)

cat("Full dataset:", nrow(df), "observations\n")

## Full dataset: 2357 observations

cat("Analysis sample (foranalysis == 1):", sum(df$foranalysis == 1, na.rm = TRUE), "observations\n")

## Analysis sample (foranalysis == 1): 1226 observations

Restrict to Analysis Sample

dfa <- df %>% filter(foranalysis == 1)
cat("Analysis sample:", nrow(dfa), "observations\n")

## Analysis sample: 1226 observations

Part 2: Replication of Main Results

Table 1: Balance Test (Pooled Treatment vs Control)

balance_vars <- c("bl_head", "bl_colom", "bl_age", "bl_edu_sec", "bl_married",
                   "bl_indig", "bl_black", "bl_dempl", "ownhouse",
                   "bl_partn_sedu", "bl_partn_age",
                   "bl_nchildren0t5", "bl_nchildren6t14", "bl_totcons_pc", "norooms",
                   "bl_lphysex", "bl_control", "bl_emotional", "bl_physical", "bl_asset_index", "bl_carchi")

balance_results <- data.frame(
  Variable = balance_vars,
  Mean_Control = NA, Mean_Treatment = NA, P_value = NA
)

for (i in seq_along(balance_vars)) {
  v <- balance_vars[i]
  if (v %in% names(dfa)) {
    ctrl <- dfa[[v]][dfa$p_treat == 0]
    trt  <- dfa[[v]][dfa$p_treat == 1]
    balance_results$Mean_Control[i]   <- round(mean(ctrl, na.rm = TRUE), 3)
    balance_results$Mean_Treatment[i] <- round(mean(trt, na.rm = TRUE), 3)
    tt <- try(t.test(ctrl, trt), silent = TRUE)
    if (!inherits(tt, "try-error")) {
      balance_results$P_value[i] <- round(tt$p.value, 3)
    }
  }
}

print(balance_results)

##            Variable Mean_Control Mean_Treatment P_value
## 1           bl_head        0.032          0.018   0.191
## 2          bl_colom        0.394          0.331   0.042
## 3            bl_age       35.293         34.664   0.409
## 4        bl_edu_sec        0.380          0.387   0.812
## 5        bl_married        0.417          0.426   0.792
## 6          bl_indig        0.035          0.043   0.488
## 7          bl_black        0.061          0.070   0.540
## 8          bl_dempl        0.307          0.326   0.530
## 9          ownhouse        0.041          0.050   0.469
## 10    bl_partn_sedu        0.357          0.393   0.237
## 11     bl_partn_age       39.203         38.390   0.319
## 12  bl_nchildren0t5        0.722          0.755   0.523
## 13 bl_nchildren6t14        1.023          0.873   0.040
## 14    bl_totcons_pc      107.324        106.311   0.821
## 15          norooms        0.128          0.153   0.237
## 16       bl_lphysex        0.333          0.351   0.563
## 17       bl_control        0.171          0.167   0.862
## 18     bl_emotional        0.243          0.267   0.398
## 19      bl_physical        0.125          0.176   0.020
## 20   bl_asset_index        0.685          0.338   0.004
## 21        bl_carchi        0.330          0.397   0.027

Table 2: Impact of Pooled Treatment on IPV (LPM)

We replicate the probit regressions from Table 2 using LPM (linear probability models) as in the robustness appendix, since marginal effects from probit in R give equivalent results.

controls_formula <- "bl_head + bl_colom + bl_married + bl_indig + bl_black + 
                      bl_age + bl_edu_sec + bl_dempl + ownhouse + 
                      bl_partn_sedu + bl_partn_age + 
                      bl_nchildren0t5 + bl_nchildren6t14 + 
                      wealth2 + wealth3 + wealth4 + bl_carchi"

outcomes <- c("el_control", "el_emotional", "el_physical")
bl_outcomes <- c("bl_control", "bl_emotional", "bl_physical")

cat("=== Table 2: Impact of Pooled Treatment on IPV (LPM) ===\n\n")

## === Table 2: Impact of Pooled Treatment on IPV (LPM) ===

for (i in 1:3) {
  outcome <- outcomes[i]
  bl_outcome <- bl_outcomes[i]
  
  cat("--- Outcome:", outcome, "---\n")
  
  ctrl_mean <- mean(dfa[[outcome]][dfa$p_treat == 0], na.rm = TRUE)
  cat("Endline control mean:", round(ctrl_mean, 3), "\n")
  
  f1 <- as.formula(paste(outcome, "~ p_treat"))
  m1 <- lm(f1, data = dfa)
  
  f2 <- as.formula(paste(outcome, "~ p_treat +", bl_outcome, "+ bl_carchi"))
  m2 <- lm(f2, data = dfa)
  
  f3 <- as.formula(paste(outcome, "~ p_treat +", bl_outcome, "+", controls_formula))
  m3 <- lm(f3, data = dfa)
  
  se1 <- sqrt(diag(vcovCL(m1, cluster = dfa$bl_a05)))
  se2 <- sqrt(diag(vcovCL(m2, cluster = dfa$bl_a05)))
  se3 <- sqrt(diag(vcovCL(m3, cluster = dfa$bl_a05)))
  
  cat("  No controls:       coef =", round(coef(m1)["p_treat"], 3), 
      " SE =", round(se1["p_treat"], 3), "\n")
  cat("  Basic controls:    coef =", round(coef(m2)["p_treat"], 3), 
      " SE =", round(se2["p_treat"], 3), "\n")
  cat("  Extended controls: coef =", round(coef(m3)["p_treat"], 3), 
      " SE =", round(se3["p_treat"], 3), "\n\n")
}

## --- Outcome: el_control ---
## Endline control mean: 0.362 
##   No controls:       coef = -0.072  SE = 0.034 
##   Basic controls:    coef = -0.066  SE = 0.033 
##   Extended controls: coef = -0.068  SE = 0.034 
## 
## --- Outcome: el_emotional ---
## Endline control mean: 0.351 
##   No controls:       coef = -0.04  SE = 0.036 
##   Basic controls:    coef = -0.043  SE = 0.034 
##   Extended controls: coef = -0.04  SE = 0.034 
## 
## --- Outcome: el_physical ---
## Endline control mean: 0.197 
##   No controls:       coef = -0.044  SE = 0.035 
##   Basic controls:    coef = -0.063  SE = 0.033 
##   Extended controls: coef = -0.06  SE = 0.033

Part 3: Two-Step Causal Forest Pipeline (Athey/Wager)

Prepare Data

cf_vars <- c("bl_head", "bl_colom", "bl_age", "bl_edu_sec", "bl_married",
              "bl_indig", "bl_black", "bl_dempl", "ownhouse",
              "bl_partn_sedu", "bl_partn_age",
              "bl_nchildren0t5", "bl_nchildren6t14",
              "bl_asset_index", "norooms", "bl_carchi",
              "bl_physical", "bl_emotional", "bl_control")

cf_data <- dfa %>%
  select(all_of(c("el_physical", "el_emotional", "el_control", 
                    "p_treat", "bl_a05", cf_vars))) %>%
  na.omit()

# Policy-relevant covariates: exclude cross-outcome baseline IPV variables
# (e.g., baseline emotional/controlling abuse is not actionable for targeting physical violence)
cf_vars_policy_phys <- setdiff(cf_vars, c("bl_emotional", "bl_control"))
cf_vars_policy_ctrl <- setdiff(cf_vars, c("bl_physical", "bl_emotional"))
cf_vars_policy_emot <- setdiff(cf_vars, c("bl_physical", "bl_control"))

# Purely observable covariates: exclude ALL baseline IPV variables
# (policymakers would not have access to baseline IPV survey responses)
cf_vars_policy_obs <- setdiff(cf_vars, c("bl_physical", "bl_emotional", "bl_control"))

cat("Observations for causal forest:", nrow(cf_data), "\n")

## Observations for causal forest: 1226

cat("Spec A covariates:", length(cf_vars), "\n")

## Spec A covariates: 19

cat("Spec C covariates (purely observable):", length(cf_vars_policy_obs), "\n")

## Spec C covariates (purely observable): 16

Correlation Plot

corrplot(cor(cf_data %>% select(all_of(cf_vars)), use = "complete.obs"), 
         type = "upper", tl.col = "black", tl.cex = 0.8)

K-Fold Cross-Validation Setup

With only ~1,100 observations, a classic 70/30 train/test split wastes valuable data. We use 5-fold cross-validation to generate out-of-fold CATE predictions while maximizing the training set size in each fold. The full data is used for point estimation (ATE, BLP, calibration), while k-fold out-of-fold predictions are used for evaluation (CATE distribution, quartile analysis).

K <- 5
set.seed(1234)
folds <- createFolds(cf_data$el_physical, k = K, list = TRUE)

# Full data for estimation
X      <- cf_data %>% select(all_of(cf_vars))
Y      <- cf_data$el_physical
W      <- cf_data$p_treat
Y_ctrl <- cf_data$el_control
Y_emot <- cf_data$el_emotional
n      <- nrow(cf_data)

cat("Full dataset:", n, "observations\n")

## Full dataset: 1226 observations

cat("K-fold CV with K =", K, "\n")

## K-fold CV with K = 5

cat("Fold sizes:", sapply(folds, length), "\n")

## Fold sizes: 245 245 245 246 245

Step 1: Propensity Forest — Randomization Check

Since this is an RCT, treatment assignment should be independent of covariates. We verify this by fitting a regression forest to predict W from X. If randomization worked, no covariate should have high predictive importance, and estimated propensity scores should be approximately constant.

set.seed(100241)

# Fit regression forest to predict treatment assignment
rf_W <- regression_forest(X, W, tune.parameters = "all")
W_hat <- predict(rf_W)$predictions

# Variable importance for treatment prediction
W_varimp <- variable_importance(rf_W)
W_vi_df <- data.frame(variable = names(X), importance = as.numeric(W_varimp))
W_vi_df <- W_vi_df %>% arrange(desc(importance))

cat("Propensity score summary (should be ~constant around", round(mean(W), 3), "):\n")

## Propensity score summary (should be ~constant around 0.719 ):

summary(W_hat)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.6114  0.6850  0.7161  0.7185  0.7536  0.8387

cat("\nSD of estimated propensity scores:", round(sd(W_hat), 4))

## 
## SD of estimated propensity scores: 0.0452

cat("\nActual treatment share:", round(mean(W), 3), "\n")

## 
## Actual treatment share: 0.719

# Plot
ggplot(W_vi_df, aes(x = reorder(variable, importance), y = importance)) +
  geom_col(fill = "coral", alpha = 0.8) +
  coord_flip() +
  labs(title = "Variable Importance for Predicting Treatment Assignment",
       subtitle = "Randomization check: importance should be uniformly low",
       x = "", y = "Importance") +
  theme_minimal()

The propensity scores should cluster tightly around the actual treatment share with low variance. The variable importance bars should be roughly flat — if any covariate strongly predicted treatment, it would raise concerns about the randomization. This check complements the balance test from Part 2.

Step 2: Outcome Regression Forests & Variable Selection

We fit regression forests to predict each IPV outcome from the covariates. This serves two purposes: (1) generating outcome predictions (Y.hat) for orthogonalization in the causal forest, and (2) identifying which covariates are strong predictors of the outcome level — these are the most promising candidates for detecting heterogeneity.

set.seed(100241)

# Outcome vectors Y_ctrl, Y_emot already defined in cv-setup (full data)

# Physical/Sexual Violence
rf_Y_phys <- regression_forest(X, Y, tune.parameters = "all")
Y_hat_phys <- predict(rf_Y_phys)$predictions
Y_vi_phys <- variable_importance(rf_Y_phys)

# Controlling Behaviors
rf_Y_ctrl <- regression_forest(X, Y_ctrl, tune.parameters = "all")
Y_hat_ctrl <- predict(rf_Y_ctrl)$predictions
Y_vi_ctrl <- variable_importance(rf_Y_ctrl)

# Emotional Violence
rf_Y_emot <- regression_forest(X, Y_emot, tune.parameters = "all")
Y_hat_emot <- predict(rf_Y_emot)$predictions
Y_vi_emot <- variable_importance(rf_Y_emot)

vi_outcome_df <- data.frame(
  variable = names(X),
  Physical = round(as.numeric(Y_vi_phys), 4),
  Controlling = round(as.numeric(Y_vi_ctrl), 4),
  Emotional = round(as.numeric(Y_vi_emot), 4)
)
vi_outcome_df <- vi_outcome_df %>% arrange(desc(Physical))

knitr::kable(vi_outcome_df, caption = "Outcome Regression Forest: Variable Importance for Predicting IPV Levels")

Outcome Regression Forest: Variable Importance for Predicting IPV Levels
variable	Physical	Controlling	Emotional
bl_physical	0.4980	0.1493	0.1989
bl_control	0.1432	0.3389	0.1726
bl_emotional	0.0894	0.2445	0.4032
bl_asset_index	0.0805	0.0624	0.0617
bl_partn_age	0.0486	0.0423	0.0355
bl_age	0.0484	0.0780	0.0328
norooms	0.0149	0.0045	0.0031
bl_married	0.0143	0.0163	0.0178
bl_nchildren6t14	0.0131	0.0120	0.0132
bl_nchildren0t5	0.0127	0.0122	0.0164
bl_partn_sedu	0.0079	0.0047	0.0059
bl_dempl	0.0071	0.0076	0.0040
bl_carchi	0.0070	0.0078	0.0149
bl_edu_sec	0.0059	0.0060	0.0072
bl_colom	0.0050	0.0051	0.0053
bl_black	0.0017	0.0024	0.0027
ownhouse	0.0015	0.0025	0.0028
bl_indig	0.0006	0.0023	0.0018
bl_head	0.0002	0.0011	0.0004

# Athey/Wager threshold: keep variables with importance > 0.2 * mean
threshold <- 0.2

selected_phys <- which(Y_vi_phys / mean(Y_vi_phys) > threshold)
selected_ctrl <- which(Y_vi_ctrl / mean(Y_vi_ctrl) > threshold)
selected_emot <- which(Y_vi_emot / mean(Y_vi_emot) > threshold)

cat("Selected variables for Physical/Sexual Violence (", length(selected_phys), "of", ncol(X), "):\n")

## Selected variables for Physical/Sexual Violence ( 10 of 19 ):

print(names(X)[selected_phys])

##  [1] "bl_age"           "bl_married"       "bl_partn_age"     "bl_nchildren0t5" 
##  [5] "bl_nchildren6t14" "bl_asset_index"   "norooms"          "bl_physical"     
##  [9] "bl_emotional"     "bl_control"

cat("\nSelected variables for Controlling Behaviors (", length(selected_ctrl), "of", ncol(X), "):\n")

## 
## Selected variables for Controlling Behaviors ( 9 of 19 ):

print(names(X)[selected_ctrl])

## [1] "bl_age"           "bl_married"       "bl_partn_age"     "bl_nchildren0t5" 
## [5] "bl_nchildren6t14" "bl_asset_index"   "bl_physical"      "bl_emotional"    
## [9] "bl_control"

cat("\nSelected variables for Emotional Violence (", length(selected_emot), "of", ncol(X), "):\n")

## 
## Selected variables for Emotional Violence ( 10 of 19 ):

print(names(X)[selected_emot])

##  [1] "bl_age"           "bl_married"       "bl_partn_age"     "bl_nchildren0t5" 
##  [5] "bl_nchildren6t14" "bl_asset_index"   "bl_carchi"        "bl_physical"     
##  [9] "bl_emotional"     "bl_control"

Policy-Relevant Variable Selection (Specification B)

For policy purposes, we create an alternative specification that excludes cross-outcome baseline IPV variables. When targeting a program to reduce physical violence, a policymaker would not typically know a woman’s baseline emotional or controlling abuse status — these are measured through confidential surveys. Specification B uses only covariates that would plausibly be available for targeting decisions.

# Policy-relevant covariate matrices
X_pol_phys <- X[, cf_vars_policy_phys]
X_pol_ctrl <- X[, cf_vars_policy_ctrl]
X_pol_emot <- X[, cf_vars_policy_emot]

# Specification C: purely observable covariates (same for all outcomes)
X_pol_obs <- X[, cf_vars_policy_obs]

# Outcome regression forests with policy-relevant covariates
rf_Y_phys_pol <- regression_forest(X_pol_phys, Y, tune.parameters = "all")
Y_hat_phys_pol <- predict(rf_Y_phys_pol)$predictions
Y_vi_phys_pol <- variable_importance(rf_Y_phys_pol)

rf_Y_ctrl_pol <- regression_forest(X_pol_ctrl, Y_ctrl, tune.parameters = "all")
Y_hat_ctrl_pol <- predict(rf_Y_ctrl_pol)$predictions
Y_vi_ctrl_pol <- variable_importance(rf_Y_ctrl_pol)

rf_Y_emot_pol <- regression_forest(X_pol_emot, Y_emot, tune.parameters = "all")
Y_hat_emot_pol <- predict(rf_Y_emot_pol)$predictions
Y_vi_emot_pol <- variable_importance(rf_Y_emot_pol)

# Variable selection for policy specification
selected_phys_pol <- which(Y_vi_phys_pol / mean(Y_vi_phys_pol) > threshold)
selected_ctrl_pol <- which(Y_vi_ctrl_pol / mean(Y_vi_ctrl_pol) > threshold)
selected_emot_pol <- which(Y_vi_emot_pol / mean(Y_vi_emot_pol) > threshold)

cat("=== Policy-Relevant Variable Selection (Specification B) ===")

## === Policy-Relevant Variable Selection (Specification B) ===

cat("\nPhysical/Sexual (", length(selected_phys_pol), "of", ncol(X_pol_phys), "):\n")

## 
## Physical/Sexual ( 8 of 17 ):

print(names(X_pol_phys)[selected_phys_pol])

## [1] "bl_age"           "bl_married"       "bl_partn_age"     "bl_nchildren0t5" 
## [5] "bl_nchildren6t14" "bl_asset_index"   "norooms"          "bl_physical"

cat("\nControlling (", length(selected_ctrl_pol), "of", ncol(X_pol_ctrl), "):\n")

## 
## Controlling ( 12 of 17 ):

print(names(X_pol_ctrl)[selected_ctrl_pol])

##  [1] "bl_colom"         "bl_age"           "bl_edu_sec"       "bl_married"      
##  [5] "bl_dempl"         "bl_partn_age"     "bl_nchildren0t5"  "bl_nchildren6t14"
##  [9] "bl_asset_index"   "norooms"          "bl_carchi"        "bl_control"

cat("\nEmotional (", length(selected_emot_pol), "of", ncol(X_pol_emot), "):\n")

## 
## Emotional ( 8 of 17 ):

print(names(X_pol_emot)[selected_emot_pol])

## [1] "bl_age"           "bl_married"       "bl_partn_age"     "bl_nchildren0t5" 
## [5] "bl_nchildren6t14" "bl_asset_index"   "bl_carchi"        "bl_emotional"

Purely Observable Variable Selection (Specification C)

Specification C excludes all baseline IPV variables (physical, emotional, controlling). This represents a targeting scenario where policymakers only have access to observable demographic and socioeconomic characteristics — no confidential IPV survey data at all.

# Outcome regression forests with purely observable covariates
rf_Y_phys_obs <- regression_forest(X_pol_obs, Y, tune.parameters = "all")
Y_hat_phys_obs <- predict(rf_Y_phys_obs)$predictions
Y_vi_phys_obs <- variable_importance(rf_Y_phys_obs)

rf_Y_ctrl_obs <- regression_forest(X_pol_obs, Y_ctrl, tune.parameters = "all")
Y_hat_ctrl_obs <- predict(rf_Y_ctrl_obs)$predictions
Y_vi_ctrl_obs <- variable_importance(rf_Y_ctrl_obs)

rf_Y_emot_obs <- regression_forest(X_pol_obs, Y_emot, tune.parameters = "all")
Y_hat_emot_obs <- predict(rf_Y_emot_obs)$predictions
Y_vi_emot_obs <- variable_importance(rf_Y_emot_obs)

# Variable selection for purely observable specification
selected_phys_obs <- which(Y_vi_phys_obs / mean(Y_vi_phys_obs) > threshold)
selected_ctrl_obs <- which(Y_vi_ctrl_obs / mean(Y_vi_ctrl_obs) > threshold)
selected_emot_obs <- which(Y_vi_emot_obs / mean(Y_vi_emot_obs) > threshold)

cat("=== Purely Observable Variable Selection (Specification C) ===")

## === Purely Observable Variable Selection (Specification C) ===

cat("\nPhysical/Sexual (", length(selected_phys_obs), "of", ncol(X_pol_obs), "):\n")

## 
## Physical/Sexual ( 11 of 16 ):

print(names(X_pol_obs)[selected_phys_obs])

##  [1] "bl_colom"         "bl_age"           "bl_married"       "bl_black"        
##  [5] "bl_dempl"         "bl_partn_sedu"    "bl_partn_age"     "bl_nchildren0t5" 
##  [9] "bl_nchildren6t14" "bl_asset_index"   "norooms"

cat("\nControlling (", length(selected_ctrl_obs), "of", ncol(X_pol_obs), "):\n")

## 
## Controlling ( 16 of 16 ):

print(names(X_pol_obs)[selected_ctrl_obs])

##  [1] "bl_head"          "bl_colom"         "bl_age"           "bl_edu_sec"      
##  [5] "bl_married"       "bl_indig"         "bl_black"         "bl_dempl"        
##  [9] "ownhouse"         "bl_partn_sedu"    "bl_partn_age"     "bl_nchildren0t5" 
## [13] "bl_nchildren6t14" "bl_asset_index"   "norooms"          "bl_carchi"

cat("\nEmotional (", length(selected_emot_obs), "of", ncol(X_pol_obs), "):\n")

## 
## Emotional ( 12 of 16 ):

print(names(X_pol_obs)[selected_emot_obs])

##  [1] "bl_colom"         "bl_age"           "bl_edu_sec"       "bl_married"      
##  [5] "bl_dempl"         "bl_partn_sedu"    "bl_partn_age"     "bl_nchildren0t5" 
##  [9] "bl_nchildren6t14" "bl_asset_index"   "norooms"          "bl_carchi"

vi_long <- vi_outcome_df %>%
  pivot_longer(cols = c(Physical, Controlling, Emotional),
               names_to = "Outcome", values_to = "Importance")

ggplot(vi_long, aes(x = reorder(variable, Importance), y = Importance, fill = Outcome)) +
  geom_col(position = "dodge", alpha = 0.8) +
  coord_flip() +
  labs(title = "Outcome Regression Forest: Variable Importance Across IPV Outcomes",
       subtitle = "Variables above the selection threshold are retained for the causal forest",
       x = "", y = "Importance") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")

vi_phys_pol <- data.frame(
  variable = names(X_pol_phys),
  Importance = as.numeric(Y_vi_phys_pol),
  Outcome = "Physical"
)

vi_ctrl_pol <- data.frame(
  variable = names(X_pol_ctrl),
  Importance = as.numeric(Y_vi_ctrl_pol),
  Outcome = "Controlling"
)

vi_emot_pol <- data.frame(
  variable = names(X_pol_emot),
  Importance = as.numeric(Y_vi_emot_pol),
  Outcome = "Emotional"
)

vi_long_pol <- bind_rows(vi_phys_pol, vi_ctrl_pol, vi_emot_pol)

# Plot
ggplot(vi_long_pol,
       aes(x = reorder(variable, Importance), y = Importance, fill = Outcome)) +
  geom_col(position = "dodge", alpha = 0.8) +
  coord_flip() +
  labs(title = "Outcome Regression Forest: Variable Importance Across IPV Outcomes",
       subtitle = "Policy-Relevant Covariates (Specification B)",
       x = "", y = "Importance") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")

vi_outcome_df_obs <- data.frame(
  variable = names(X_pol_obs),
  Physical = round(as.numeric(Y_vi_phys_obs), 4),
  Controlling = round(as.numeric(Y_vi_ctrl_obs), 4),
  Emotional = round(as.numeric(Y_vi_emot_obs), 4)
)
vi_outcome_df_obs <- vi_outcome_df_obs %>% arrange(desc(Physical))

knitr::kable(vi_outcome_df_obs, caption = "Outcome Regression Forest: Variable Importance — Purely Observable Covariates (Specification C)")

Outcome Regression Forest: Variable Importance — Purely Observable Covariates (Specification C)
variable	Physical	Controlling	Emotional
bl_asset_index	0.2745	0.0896	0.1058
bl_age	0.2264	0.1131	0.1074
bl_partn_age	0.2046	0.1060	0.1001
bl_nchildren0t5	0.0609	0.0738	0.1015
bl_nchildren6t14	0.0510	0.0714	0.0804
bl_married	0.0454	0.0785	0.0914
norooms	0.0434	0.0532	0.0609
bl_black	0.0188	0.0384	0.0016
bl_partn_sedu	0.0178	0.0519	0.0686
bl_dempl	0.0150	0.0611	0.0652
bl_colom	0.0126	0.0517	0.0624
bl_carchi	0.0118	0.0590	0.0860
bl_edu_sec	0.0102	0.0539	0.0671
ownhouse	0.0048	0.0349	0.0006
bl_indig	0.0020	0.0379	0.0007
bl_head	0.0008	0.0258	0.0002

vi_long_obs <- vi_outcome_df_obs %>%
  pivot_longer(cols = c(Physical, Controlling, Emotional),
               names_to = "Outcome", values_to = "Importance")

ggplot(vi_long_obs,
       aes(x = reorder(variable, Importance), y = Importance, fill = Outcome)) +
  geom_col(position = "dodge", alpha = 0.8) +
  coord_flip() +
  labs(title = "Outcome Regression Forest: Variable Importance Across IPV Outcomes",
       subtitle = "Purely Observable Covariates (Specification C)",
       x = "", y = "Importance") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set2")

The variable importance from the outcome regression forests tells us which covariates are strong predictors of IPV levels. Variables that survive the selection threshold are kept in the causal forest. Note that predicting outcome levels well is distinct from driving heterogeneity in treatment effects — but following Athey & Wager, pre-selecting on outcome prediction is a principled way to reduce noise in the causal forest when the covariate space is large relative to the sample size.

Step 3: Refined Causal Forests with Orthogonalization

We estimate causal forests in three specifications:

Specification A uses all selected covariates including cross-outcome baseline IPV variables
Specification B (policy-relevant) excludes cross-outcome baseline IPV variables that would not be available for targeting
Specification C (purely observable) excludes all baseline IPV variables entirely

Full-Data Estimation

set.seed(100241)

# --- Specification A: All selected covariates ---
cf <- causal_forest(X[, selected_phys], Y, W,
                    W.hat = W_hat, Y.hat = Y_hat_phys,
                    num.trees = 4000, tune.parameters = "all",
                    compute.oob.predictions = TRUE)
ate_all <- average_treatment_effect(cf, target.sample = "all")
att     <- average_treatment_effect(cf, target.sample = "treated")

cf_ctrl <- causal_forest(X[, selected_ctrl], Y_ctrl, W,
                          W.hat = W_hat, Y.hat = Y_hat_ctrl,
                          num.trees = 4000, tune.parameters = "all",
                          compute.oob.predictions = TRUE)
ate_ctrl <- average_treatment_effect(cf_ctrl, target.sample = "all")

cf_emot <- causal_forest(X[, selected_emot], Y_emot, W,
                          W.hat = W_hat, Y.hat = Y_hat_emot,
                          num.trees = 4000, tune.parameters = "all",
                          compute.oob.predictions = TRUE)
ate_emot <- average_treatment_effect(cf_emot, target.sample = "all")

# --- Specification B: Policy-relevant covariates ---
cf_pol <- causal_forest(X_pol_phys[, selected_phys_pol], Y, W,
                         W.hat = W_hat, Y.hat = Y_hat_phys_pol,
                         num.trees = 4000, tune.parameters = "all",
                         compute.oob.predictions = TRUE)
ate_all_pol <- average_treatment_effect(cf_pol, target.sample = "all")

cf_ctrl_pol <- causal_forest(X_pol_ctrl[, selected_ctrl_pol], Y_ctrl, W,
                              W.hat = W_hat, Y.hat = Y_hat_ctrl_pol,
                              num.trees = 4000, tune.parameters = "all",
                              compute.oob.predictions = TRUE)
ate_ctrl_pol <- average_treatment_effect(cf_ctrl_pol, target.sample = "all")

cf_emot_pol <- causal_forest(X_pol_emot[, selected_emot_pol], Y_emot, W,
                              W.hat = W_hat, Y.hat = Y_hat_emot_pol,
                              num.trees = 4000, tune.parameters = "all",
                              compute.oob.predictions = TRUE)
ate_emot_pol <- average_treatment_effect(cf_emot_pol, target.sample = "all")

# --- Specification C: Purely observable covariates ---
cf_phys_obs <- causal_forest(X_pol_obs[, selected_phys_obs], Y, W,
                              W.hat = W_hat, Y.hat = Y_hat_phys_obs,
                              num.trees = 4000, tune.parameters = "all",
                              compute.oob.predictions = TRUE)
ate_all_obs <- average_treatment_effect(cf_phys_obs, target.sample = "all")
att_obs     <- average_treatment_effect(cf_phys_obs, target.sample = "treated")

cf_ctrl_obs <- causal_forest(X_pol_obs[, selected_ctrl_obs], Y_ctrl, W,
                              W.hat = W_hat, Y.hat = Y_hat_ctrl_obs,
                              num.trees = 4000, tune.parameters = "all",
                              compute.oob.predictions = TRUE)
ate_ctrl_obs <- average_treatment_effect(cf_ctrl_obs, target.sample = "all")

cf_emot_obs <- causal_forest(X_pol_obs[, selected_emot_obs], Y_emot, W,
                              W.hat = W_hat, Y.hat = Y_hat_emot_obs,
                              num.trees = 4000, tune.parameters = "all",
                              compute.oob.predictions = TRUE)
ate_emot_obs <- average_treatment_effect(cf_emot_obs, target.sample = "all")

cat("=== Specification A (All Selected Covariates) ===\n")

## === Specification A (All Selected Covariates) ===

cat("Physical/Sexual:  ATE =", round(ate_all[1], 4), " SE =", round(ate_all[2], 4), "\n")

## Physical/Sexual:  ATE = -0.0577  SE = 0.0239

cat("                  ATT =", round(att[1], 4), " SE =", round(att[2], 4), "\n")

##                   ATT = -0.0587  SE = 0.0243

cat("Controlling:      ATE =", round(ate_ctrl[1], 4), " SE =", round(ate_ctrl[2], 4), "\n")

## Controlling:      ATE = -0.0697  SE = 0.0293

cat("Emotional:        ATE =", round(ate_emot[1], 4), " SE =", round(ate_emot[2], 4), "\n")

## Emotional:        ATE = -0.0464  SE = 0.0287

cat("\n=== Specification B (Policy-Relevant) ===\n")

## 
## === Specification B (Policy-Relevant) ===

cat("Physical/Sexual:  ATE =", round(ate_all_pol[1], 4), " SE =", round(ate_all_pol[2], 4), "\n")

## Physical/Sexual:  ATE = -0.0603  SE = 0.024

cat("Controlling:      ATE =", round(ate_ctrl_pol[1], 4), " SE =", round(ate_ctrl_pol[2], 4), "\n")

## Controlling:      ATE = -0.0621  SE = 0.0294

cat("Emotional:        ATE =", round(ate_emot_pol[1], 4), " SE =", round(ate_emot_pol[2], 4), "\n")

## Emotional:        ATE = -0.0485  SE = 0.029

cat("\n=== Specification C (Purely Observable) ===\n")

## 
## === Specification C (Purely Observable) ===

cat("Physical/Sexual:  ATE =", round(ate_all_obs[1], 4), " SE =", round(ate_all_obs[2], 4), "\n")

## Physical/Sexual:  ATE = -0.0487  SE = 0.0252

cat("                  ATT =", round(att_obs[1], 4), " SE =", round(att_obs[2], 4), "\n")

##                   ATT = -0.0492  SE = 0.0256

cat("Controlling:      ATE =", round(ate_ctrl_obs[1], 4), " SE =", round(ate_ctrl_obs[2], 4), "\n")

## Controlling:      ATE = -0.0694  SE = 0.0302

cat("Emotional:        ATE =", round(ate_emot_obs[1], 4), " SE =", round(ate_emot_obs[2], 4), "\n")

## Emotional:        ATE = -0.0419  SE = 0.0303

K-Fold Cross-Validated CATE Predictions

We generate out-of-fold CATE predictions using 5-fold CV. In each fold, we fit causal forests on the training folds and predict on the held-out fold, ensuring every observation has a truly out-of-sample CATE prediction for downstream evaluation.

set.seed(100241)

oof_cate_phys <- rep(NA, n)
oof_cate_ctrl <- rep(NA, n)
oof_cate_emot <- rep(NA, n)
oof_cate_phys_pol <- rep(NA, n)
oof_cate_ctrl_pol <- rep(NA, n)
oof_cate_emot_pol <- rep(NA, n)
oof_cate_phys_obs <- rep(NA, n)
oof_cate_ctrl_obs <- rep(NA, n)
oof_cate_emot_obs <- rep(NA, n)

for (k in 1:K) {
  val_idx   <- folds[[k]]
  train_idx <- setdiff(1:n, val_idx)
  
  X_tr <- X[train_idx, ]; X_va <- X[val_idx, ]
  Y_tr <- Y[train_idx]; W_tr <- W[train_idx]
  Y_ctrl_tr <- Y_ctrl[train_idx]; Y_emot_tr <- Y_emot[train_idx]
  
  # Use full-data OOB nuisance estimates (valid since OOB is cross-validated)
  W_hat_tr      <- W_hat[train_idx]
  Y_hat_phys_tr <- Y_hat_phys[train_idx]
  Y_hat_ctrl_tr <- Y_hat_ctrl[train_idx]
  Y_hat_emot_tr <- Y_hat_emot[train_idx]
  
  # Spec A causal forests
  cf_k <- causal_forest(X_tr[, selected_phys], Y_tr, W_tr,
                         W.hat = W_hat_tr, Y.hat = Y_hat_phys_tr,
                         num.trees = 2000, tune.parameters = "all")
  oof_cate_phys[val_idx] <- predict(cf_k, X_va[, selected_phys])$predictions
  
  cf_k_c <- causal_forest(X_tr[, selected_ctrl], Y_ctrl_tr, W_tr,
                            W.hat = W_hat_tr, Y.hat = Y_hat_ctrl_tr,
                            num.trees = 2000, tune.parameters = "all")
  oof_cate_ctrl[val_idx] <- predict(cf_k_c, X_va[, selected_ctrl])$predictions
  
  cf_k_e <- causal_forest(X_tr[, selected_emot], Y_emot_tr, W_tr,
                            W.hat = W_hat_tr, Y.hat = Y_hat_emot_tr,
                            num.trees = 2000, tune.parameters = "all")
  oof_cate_emot[val_idx] <- predict(cf_k_e, X_va[, selected_emot])$predictions
  
  # Spec B: policy-relevant
  X_pol_phys_tr <- X_tr[, cf_vars_policy_phys]; X_pol_phys_va <- X_va[, cf_vars_policy_phys]
  X_pol_ctrl_tr <- X_tr[, cf_vars_policy_ctrl]; X_pol_ctrl_va <- X_va[, cf_vars_policy_ctrl]
  X_pol_emot_tr <- X_tr[, cf_vars_policy_emot]; X_pol_emot_va <- X_va[, cf_vars_policy_emot]
  
  Y_hat_phys_pol_tr <- Y_hat_phys_pol[train_idx]
  Y_hat_ctrl_pol_tr <- Y_hat_ctrl_pol[train_idx]
  Y_hat_emot_pol_tr <- Y_hat_emot_pol[train_idx]
  
  cf_k_pp <- causal_forest(X_pol_phys_tr[, selected_phys_pol], Y_tr, W_tr,
                             W.hat = W_hat_tr, Y.hat = Y_hat_phys_pol_tr,
                             num.trees = 2000, tune.parameters = "all")
  oof_cate_phys_pol[val_idx] <- predict(cf_k_pp, X_pol_phys_va[, selected_phys_pol])$predictions
  
  cf_k_cp <- causal_forest(X_pol_ctrl_tr[, selected_ctrl_pol], Y_ctrl_tr, W_tr,
                             W.hat = W_hat_tr, Y.hat = Y_hat_ctrl_pol_tr,
                             num.trees = 2000, tune.parameters = "all")
  oof_cate_ctrl_pol[val_idx] <- predict(cf_k_cp, X_pol_ctrl_va[, selected_ctrl_pol])$predictions
  
  cf_k_ep <- causal_forest(X_pol_emot_tr[, selected_emot_pol], Y_emot_tr, W_tr,
                             W.hat = W_hat_tr, Y.hat = Y_hat_emot_pol_tr,
                             num.trees = 2000, tune.parameters = "all")
  oof_cate_emot_pol[val_idx] <- predict(cf_k_ep, X_pol_emot_va[, selected_emot_pol])$predictions
  
  # Spec C: purely observable covariates
  X_pol_obs_tr <- X_tr[, cf_vars_policy_obs]; X_pol_obs_va <- X_va[, cf_vars_policy_obs]
  
  Y_hat_phys_obs_tr <- Y_hat_phys_obs[train_idx]
  Y_hat_ctrl_obs_tr <- Y_hat_ctrl_obs[train_idx]
  Y_hat_emot_obs_tr <- Y_hat_emot_obs[train_idx]
  
  cf_k_po <- causal_forest(X_pol_obs_tr[, selected_phys_obs], Y_tr, W_tr,
                             W.hat = W_hat_tr, Y.hat = Y_hat_phys_obs_tr,
                             num.trees = 2000, tune.parameters = "all")
  oof_cate_phys_obs[val_idx] <- predict(cf_k_po, X_pol_obs_va[, selected_phys_obs])$predictions
  
  cf_k_co <- causal_forest(X_pol_obs_tr[, selected_ctrl_obs], Y_ctrl_tr, W_tr,
                             W.hat = W_hat_tr, Y.hat = Y_hat_ctrl_obs_tr,
                             num.trees = 2000, tune.parameters = "all")
  oof_cate_ctrl_obs[val_idx] <- predict(cf_k_co, X_pol_obs_va[, selected_ctrl_obs])$predictions
  
  cf_k_eo <- causal_forest(X_pol_obs_tr[, selected_emot_obs], Y_emot_tr, W_tr,
                             W.hat = W_hat_tr, Y.hat = Y_hat_emot_obs_tr,
                             num.trees = 2000, tune.parameters = "all")
  oof_cate_emot_obs[val_idx] <- predict(cf_k_eo, X_pol_obs_va[, selected_emot_obs])$predictions
  
  cat("Fold", k, "of", K, "complete\n")
}

## Fold 1 of 5 complete
## Fold 2 of 5 complete
## Fold 3 of 5 complete
## Fold 4 of 5 complete
## Fold 5 of 5 complete

cat("\nSpec A OOF CATE summary (Physical/Sexual):\n"); summary(oof_cate_phys)

## 
## Spec A OOF CATE summary (Physical/Sexual):

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.07316 -0.07059 -0.06403 -0.05586 -0.03794 -0.03361

cat("\nSpec B OOF CATE summary (Physical/Sexual):\n"); summary(oof_cate_phys_pol)

## 
## Spec B OOF CATE summary (Physical/Sexual):

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.08510 -0.07344 -0.07216 -0.05944 -0.03844 -0.02871

cat("\nSpec C OOF CATE summary (Physical/Sexual):\n"); summary(oof_cate_phys_obs)

## 
## Spec C OOF CATE summary (Physical/Sexual):

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -0.07133 -0.06273 -0.06011 -0.04890 -0.03542 -0.02107

Out-of-Fold CATE Distributions

cate_oof_df <- data.frame(
  CATE = c(oof_cate_phys, oof_cate_phys_pol, oof_cate_phys_obs),
  Specification = factor(rep(
    c("Spec A (All Selected)", "Spec B (Policy-Relevant)", "Spec C (Observable)"),
    each = n
  ), levels = c("Spec A (All Selected)", "Spec B (Policy-Relevant)", "Spec C (Observable)"))
)

cate_mean_df <- data.frame(
  Specification = factor(
    c("Spec A (All Selected)", "Spec B (Policy-Relevant)", "Spec C (Observable)"),
    levels = c("Spec A (All Selected)", "Spec B (Policy-Relevant)", "Spec C (Observable)")
  ),
  Mean = c(mean(oof_cate_phys), mean(oof_cate_phys_pol), mean(oof_cate_phys_obs))
)

ggplot(cate_oof_df, aes(x = CATE)) +
  geom_density(fill = "steelblue", alpha = 0.4) +
  geom_vline(xintercept = 0, color = "blue", linetype = "dotted") +
  geom_vline(data = cate_mean_df, aes(xintercept = Mean), color = "red", linetype = "dotted") +
  facet_wrap(~ Specification, scales = "free") +
  labs(title = "Out-of-Fold CATE Distributions (Physical/Sexual Violence)",
       subtitle = "Blue dotted = zero  |  Red dotted = mean CATE",
       x = "Treatment Effect", y = "Density") +
  theme_minimal()

Confidence Intervals for CATEs

# OOB predictions with variance estimates from the full-data forest
oob_ci <- predict(cf, estimate.variance = TRUE)
oob_cate_phys <- oob_ci$predictions
sigma_hat <- sqrt(oob_ci$variance.estimates)
ord <- order(oob_cate_phys)

ci_df <- data.frame(
  rank  = seq_along(ord),
  cate  = oob_cate_phys[ord],
  lower = (oob_cate_phys - 1.96 * sigma_hat)[ord],
  upper = (oob_cate_phys + 1.96 * sigma_hat)[ord]
)

ggplot(ci_df, aes(x = rank, y = cate)) +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.5) +
  geom_line(color = "black") +
  geom_hline(yintercept = 0, color = "blue", linetype = "dotted") +
  geom_hline(yintercept = mean(oob_cate_phys), color = "red", linetype = "dotted") +
  labs(title = "Individual CATEs with 95% CI (OOB, Full Data)",
       subtitle = "Blue dotted = zero  |  Red dotted = mean CATE",
       x = "Observation (sorted by CATE)", y = "Treatment Effect") +
  theme_minimal()

Part 4: Results, Interpretation & Validation

4.1 Comparison Table: OLS vs Causal Forest

m_phys <- lm(as.formula(paste("el_physical ~ p_treat + bl_physical +", controls_formula)), data = dfa)
m_ctrl_lpm <- lm(as.formula(paste("el_control ~ p_treat + bl_control +", controls_formula)), data = dfa)
m_emot_lpm <- lm(as.formula(paste("el_emotional ~ p_treat + bl_emotional +", controls_formula)), data = dfa)

se_phys <- sqrt(diag(vcovCL(m_phys, cluster = dfa$bl_a05)))["p_treat"]
se_ctrl_lpm <- sqrt(diag(vcovCL(m_ctrl_lpm, cluster = dfa$bl_a05)))["p_treat"]
se_emot_lpm <- sqrt(diag(vcovCL(m_emot_lpm, cluster = dfa$bl_a05)))["p_treat"]

comparison_table <- data.frame(
  Outcome = c("Physical/Sexual Violence", "Controlling Behaviors", "Emotional Violence"),
  OLS_ATE = round(c(coef(m_phys)["p_treat"], coef(m_ctrl_lpm)["p_treat"], coef(m_emot_lpm)["p_treat"]), 4),
  OLS_SE = round(c(se_phys, se_ctrl_lpm, se_emot_lpm), 4),
  CF_ATE = round(c(ate_all[1], ate_ctrl[1], ate_emot[1]), 4),
  CF_SE = round(c(ate_all[2], ate_ctrl[2], ate_emot[2]), 4),
  Control_Mean = round(c(
    mean(dfa$el_physical[dfa$p_treat == 0], na.rm = TRUE),
    mean(dfa$el_control[dfa$p_treat == 0], na.rm = TRUE),
    mean(dfa$el_emotional[dfa$p_treat == 0], na.rm = TRUE)
  ), 3)
)

comparison_table$OLS_pct_reduction <- round(comparison_table$OLS_ATE / comparison_table$Control_Mean * 100, 1)
comparison_table$CF_pct_reduction  <- round(comparison_table$CF_ATE / comparison_table$Control_Mean * 100, 1)

knitr::kable(comparison_table, 
             col.names = c("Outcome", "OLS ATE", "OLS SE", "CF ATE", "CF SE", 
                           "Control Mean", "OLS % Reduction", "CF % Reduction"))

Outcome	OLS ATE	OLS SE	CF ATE	CF SE	Control Mean	OLS % Reduction	CF % Reduction
Physical/Sexual Violence	-0.0601	0.0333	-0.0577	0.0239	0.197	-30.5	-29.3
Controlling Behaviors	-0.0679	0.0335	-0.0697	0.0293	0.362	-18.8	-19.3
Emotional Violence	-0.0398	0.0344	-0.0464	0.0287	0.351	-11.3	-13.2

# Specification A vs B vs C comparison
spec_comparison <- data.frame(
  Outcome = c("Physical/Sexual Violence", "Controlling Behaviors", "Emotional Violence"),
  Spec_A_ATE = round(c(ate_all[1], ate_ctrl[1], ate_emot[1]), 4),
  Spec_A_SE = round(c(ate_all[2], ate_ctrl[2], ate_emot[2]), 4),
  Spec_B_ATE = round(c(ate_all_pol[1], ate_ctrl_pol[1], ate_emot_pol[1]), 4),
  Spec_B_SE = round(c(ate_all_pol[2], ate_ctrl_pol[2], ate_emot_pol[2]), 4),
  Spec_C_ATE = round(c(ate_all_obs[1], ate_ctrl_obs[1], ate_emot_obs[1]), 4),
  Spec_C_SE = round(c(ate_all_obs[2], ate_ctrl_obs[2], ate_emot_obs[2]), 4)
)

knitr::kable(spec_comparison, 
             col.names = c("Outcome", "Spec A ATE", "Spec A SE", "Spec B ATE", "Spec B SE",
                           "Spec C ATE", "Spec C SE"),
             caption = "Specification A vs B vs C: Effect of Covariate Restrictions")

Specification A vs B vs C: Effect of Covariate Restrictions
Outcome	Spec A ATE	Spec A SE	Spec B ATE	Spec B SE	Spec C ATE	Spec C SE
Physical/Sexual Violence	-0.0577	0.0239	-0.0603	0.0240	-0.0487	0.0252
Controlling Behaviors	-0.0697	0.0293	-0.0621	0.0294	-0.0694	0.0302
Emotional Violence	-0.0464	0.0287	-0.0485	0.0290	-0.0419	0.0303

Both the OLS and the Causal Forest should agree on the direction and rough magnitude of the treatment effects across all three outcomes, since this is an RCT and both methods should recover the same average effect under randomization. The causal forest now uses the Athey/Wager two-step pipeline (propensity orthogonalization + outcome-based variable selection), which should improve efficiency. Compare the CF ATE estimates with the OLS estimates — they should be close, which confirms the forest is well-calibrated at the average level.

4.2 Best Linear Projection (Heterogeneity Drivers)

cat("=== Specification A ===")

## === Specification A ===

cat("\n--- Best Linear Projection: Physical/Sexual Violence ---\n")

## 
## --- Best Linear Projection: Physical/Sexual Violence ---

blp_phys <- best_linear_projection(cf, X[, selected_phys])
print(blp_phys)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.1000044  0.1235572 -0.8094  0.41846  
## bl_age           -0.0017336  0.0043375 -0.3997  0.68946  
## bl_married        0.0918910  0.0512166  1.7942  0.07304 .
## bl_partn_age      0.0011980  0.0043217  0.2772  0.78168  
## bl_nchildren0t5   0.0208956  0.0437058  0.4781  0.63267  
## bl_nchildren6t14 -0.0178244  0.0239639 -0.7438  0.45714  
## bl_asset_index    0.0086730  0.0152429  0.5690  0.56947  
## norooms           0.0841146  0.0850259  0.9893  0.32272  
## bl_physical       0.0792716  0.1020392  0.7769  0.43738  
## bl_emotional     -0.0148248  0.0708142 -0.2093  0.83421  
## bl_control       -0.0402676  0.0964423 -0.4175  0.67636  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Best Linear Projection: Controlling Behaviors ---\n")

## 
## --- Best Linear Projection: Controlling Behaviors ---

blp_ctrl <- best_linear_projection(cf_ctrl, X[, selected_ctrl])
print(blp_ctrl)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                     Estimate  Std. Error t value Pr(>|t|)  
## (Intercept)      -0.13218713  0.12621680 -1.0473  0.29517  
## bl_age            0.00089949  0.00498699  0.1804  0.85689  
## bl_married        0.00987188  0.06289379  0.1570  0.87530  
## bl_partn_age     -0.00196940  0.00469768 -0.4192  0.67512  
## bl_nchildren0t5   0.08409471  0.04500581  1.8685  0.06193 .
## bl_nchildren6t14  0.03521191  0.02843995  1.2381  0.21591  
## bl_asset_index    0.01008179  0.01635709  0.6164  0.53778  
## bl_physical       0.16884871  0.10461778  1.6140  0.10680  
## bl_emotional     -0.09419647  0.09009391 -1.0455  0.29598  
## bl_control        0.00584518  0.10249370  0.0570  0.95453  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Best Linear Projection: Emotional Violence ---\n")

## 
## --- Best Linear Projection: Emotional Violence ---

blp_emot <- best_linear_projection(cf_emot, X[, selected_emot])
print(blp_emot)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.1465476  0.1236409 -1.1853  0.23614  
## bl_age           -0.0028275  0.0051103 -0.5533  0.58016  
## bl_married        0.0739129  0.0650190  1.1368  0.25585  
## bl_partn_age      0.0031890  0.0048066  0.6635  0.50717  
## bl_nchildren0t5   0.0856955  0.0443821  1.9309  0.05373 .
## bl_nchildren6t14 -0.0162518  0.0290938 -0.5586  0.57654  
## bl_asset_index    0.0167930  0.0165620  1.0140  0.31081  
## bl_carchi        -0.0508362  0.0624856 -0.8136  0.41605  
## bl_physical       0.0893699  0.1054841  0.8472  0.39703  
## bl_emotional     -0.0528201  0.0910615 -0.5800  0.56199  
## bl_control        0.0380223  0.1014398  0.3748  0.70786  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n=== Specification B (Policy-Relevant) ===")

## 
## === Specification B (Policy-Relevant) ===

cat("\n--- Best Linear Projection: Physical/Sexual Violence ---\n")

## 
## --- Best Linear Projection: Physical/Sexual Violence ---

blp_phys_pol <- best_linear_projection(cf_pol, X_pol_phys[, selected_phys_pol])
print(blp_phys_pol)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                     Estimate  Std. Error t value Pr(>|t|)  
## (Intercept)      -0.10841743  0.12496562 -0.8676  0.38580  
## bl_age           -0.00086593  0.00434239 -0.1994  0.84197  
## bl_married        0.09901641  0.05113983  1.9362  0.05308 .
## bl_partn_age      0.00035772  0.00438609  0.0816  0.93501  
## bl_nchildren0t5   0.02229131  0.04473826  0.4983  0.61839  
## bl_nchildren6t14 -0.02094168  0.02388605 -0.8767  0.38080  
## bl_asset_index    0.00976418  0.01553441  0.6286  0.52976  
## norooms           0.08539786  0.08636388  0.9888  0.32295  
## bl_physical       0.05196461  0.08246506  0.6301  0.52872  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Best Linear Projection: Controlling Behaviors ---\n")

## 
## --- Best Linear Projection: Controlling Behaviors ---

blp_ctrl_pol <- best_linear_projection(cf_ctrl_pol, X_pol_ctrl[, selected_ctrl_pol])
print(blp_ctrl_pol)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.2352333  0.1664114 -1.4136  0.15775  
## bl_colom          0.0165207  0.0707558  0.2335  0.81542  
## bl_age            0.0019622  0.0051679  0.3797  0.70424  
## bl_edu_sec        0.1061002  0.0710683  1.4929  0.13572  
## bl_married        0.0150949  0.0686614  0.2198  0.82603  
## bl_dempl         -0.0671536  0.0644579 -1.0418  0.29770  
## bl_partn_age     -0.0016009  0.0047794 -0.3350  0.73771  
## bl_nchildren0t5   0.1010507  0.0466592  2.1657  0.03053 *
## bl_nchildren6t14  0.0457307  0.0295246  1.5489  0.12167  
## bl_asset_index    0.0048592  0.0192305  0.2527  0.80056  
## norooms           0.0528069  0.1011868  0.5219  0.60185  
## bl_carchi         0.0104220  0.0660645  0.1578  0.87468  
## bl_control        0.0246849  0.0864850  0.2854  0.77537  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Best Linear Projection: Emotional Violence ---\n")

## 
## --- Best Linear Projection: Emotional Violence ---

blp_emot_pol <- best_linear_projection(cf_emot_pol, X_pol_emot[, selected_emot_pol])
print(blp_emot_pol)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.1472679  0.1249143 -1.1790  0.23865  
## bl_age           -0.0022061  0.0050892 -0.4335  0.66475  
## bl_married        0.0631691  0.0649907  0.9720  0.33126  
## bl_partn_age      0.0029449  0.0047976  0.6138  0.53944  
## bl_nchildren0t5   0.0830642  0.0444881  1.8671  0.06213 .
## bl_nchildren6t14 -0.0139069  0.0288333 -0.4823  0.62967  
## bl_asset_index    0.0165902  0.0168101  0.9869  0.32388  
## bl_carchi        -0.0537107  0.0628015 -0.8552  0.39258  
## bl_emotional     -0.0034706  0.0704287 -0.0493  0.96071  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n=== Specification C (Purely Observable) ===")

## 
## === Specification C (Purely Observable) ===

cat("\n--- Best Linear Projection: Physical/Sexual Violence ---\n")

## 
## --- Best Linear Projection: Physical/Sexual Violence ---

blp_phys_obs <- best_linear_projection(cf_phys_obs, X_pol_obs[, selected_phys_obs])
print(blp_phys_obs)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                     Estimate  Std. Error t value Pr(>|t|)
## (Intercept)      -0.18685174  0.14521247 -1.2867   0.1984
## bl_colom         -0.00023180  0.06074368 -0.0038   0.9970
## bl_age            0.00020569  0.00442750  0.0465   0.9630
## bl_married        0.07179208  0.05683713  1.2631   0.2068
## bl_black          0.04538864  0.10031477  0.4525   0.6510
## bl_dempl         -0.04798959  0.05710993 -0.8403   0.4009
## bl_partn_sedu     0.09204372  0.05626684  1.6358   0.1021
## bl_partn_age      0.00127089  0.00426719  0.2978   0.7659
## bl_nchildren0t5   0.03654385  0.04621558  0.7907   0.4293
## bl_nchildren6t14 -0.01403080  0.02558415 -0.5484   0.5835
## bl_asset_index    0.00124473  0.01665725  0.0747   0.9404
## norooms           0.09409778  0.08851783  1.0630   0.2880

cat("\n--- Best Linear Projection: Controlling Behaviors ---\n")

## 
## --- Best Linear Projection: Controlling Behaviors ---

blp_ctrl_obs <- best_linear_projection(cf_ctrl_obs, X_pol_obs[, selected_ctrl_obs])
print(blp_ctrl_obs)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                    Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)      -0.2328186  0.1751592 -1.3292 0.1840385    
## bl_head           0.0739068  0.2507252  0.2948 0.7682186    
## bl_colom          0.0138856  0.0738598  0.1880 0.8509087    
## bl_age            0.0057809  0.0054005  1.0705 0.2846302    
## bl_edu_sec        0.1224426  0.0798524  1.5334 0.1254487    
## bl_married        0.0014817  0.0709355  0.0209 0.9833383    
## bl_indig         -0.4989072  0.1507094 -3.3104 0.0009591 ***
## bl_black          0.1127024  0.1173344  0.9605 0.3369840    
## bl_dempl         -0.0694329  0.0663394 -1.0466 0.2954790    
## ownhouse         -0.0213614  0.1394939 -0.1531 0.8783172    
## bl_partn_sedu    -0.0247578  0.0770955 -0.3211 0.7481664    
## bl_partn_age     -0.0048983  0.0051668 -0.9480 0.3432972    
## bl_nchildren0t5   0.1107183  0.0491776  2.2514 0.0245394 *  
## bl_nchildren6t14  0.0453585  0.0302079  1.5015 0.1334759    
## bl_asset_index    0.0024416  0.0203893  0.1197 0.9047035    
## norooms           0.0783139  0.1030734  0.7598 0.4475297    
## bl_carchi         0.0153411  0.0697435  0.2200 0.8259357    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n--- Best Linear Projection: Emotional Violence ---\n")

## 
## --- Best Linear Projection: Emotional Violence ---

blp_emot_obs <- best_linear_projection(cf_emot_obs, X_pol_obs[, selected_emot_obs])
print(blp_emot_obs)

## 
## Best linear projection of the conditional average treatment effect.
## Confidence intervals are cluster- and heteroskedasticity-robust (HC3):
## 
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      -0.3016051  0.1762209 -1.7115  0.08724 .
## bl_colom          0.0322986  0.0730214  0.4423  0.65834  
## bl_age            0.0015419  0.0056732  0.2718  0.78584  
## bl_edu_sec        0.0999851  0.0796701  1.2550  0.20972  
## bl_married        0.0680858  0.0708152  0.9615  0.33651  
## bl_dempl         -0.0811585  0.0666257 -1.2181  0.22341  
## bl_partn_sedu     0.0540369  0.0777410  0.6951  0.48713  
## bl_partn_age      0.0014708  0.0053866  0.2730  0.78486  
## bl_nchildren0t5   0.1059694  0.0497525  2.1299  0.03338 *
## bl_nchildren6t14 -0.0075796  0.0309947 -0.2445  0.80685  
## bl_asset_index    0.0110714  0.0203927  0.5429  0.58729  
## norooms           0.0781911  0.1042103  0.7503  0.45321  
## bl_carchi        -0.0326712  0.0693800 -0.4709  0.63780  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The BLP results help us understand which covariates are associated with treatment effect heterogeneity — basically for whom the program works better or worse. Note that the BLP now only includes the selected covariates from the outcome regression forest pre-selection step, so the set of variables examined may differ across outcomes.

4.3 CATE Quartile Analysis

quartile_analysis <- function(Y_vec, W_vec, cate_preds, outcome_name) {
  q25 <- quantile(cate_preds, 0.25)
  q75 <- quantile(cate_preds, 0.75)
  
  ate_low <- mean(Y_vec[W_vec == 1 & cate_preds <= q25]) - 
             mean(Y_vec[W_vec == 0 & cate_preds <= q25])
  ate_mid <- mean(Y_vec[W_vec == 1 & cate_preds > q25 & cate_preds <= q75]) - 
             mean(Y_vec[W_vec == 0 & cate_preds > q25 & cate_preds <= q75])
  ate_high <- mean(Y_vec[W_vec == 1 & cate_preds > q75]) - 
              mean(Y_vec[W_vec == 0 & cate_preds > q75])
  
  data.frame(Outcome = outcome_name,
             Q1_lowest = round(ate_low, 4),
             Q2_Q3_middle = round(ate_mid, 4),
             Q4_highest = round(ate_high, 4),
             Spread = round(ate_high - ate_low, 4))
}

# Spec A: using out-of-fold predictions from k-fold CV
q_table <- rbind(
  quartile_analysis(Y, W, oof_cate_phys, "Physical/Sexual"),
  quartile_analysis(Y_ctrl, W, oof_cate_ctrl, "Controlling"),
  quartile_analysis(Y_emot, W, oof_cate_emot, "Emotional")
)
knitr::kable(q_table, caption = "Spec A: Realized ATEs by Predicted CATE Quartile (K-Fold OOF)")

Spec A: Realized ATEs by Predicted CATE Quartile (K-Fold OOF)
Outcome	Q1_lowest	Q2_Q3_middle	Q4_highest	Spread
Physical/Sexual	0.0141	-0.0400	-0.1521	-0.1662
Controlling	-0.0389	-0.0274	-0.1875	-0.1486
Emotional	0.0065	-0.0741	0.0345	0.0280

# Spec B: policy-relevant
q_table_pol <- rbind(
  quartile_analysis(Y, W, oof_cate_phys_pol, "Physical/Sexual"),
  quartile_analysis(Y_ctrl, W, oof_cate_ctrl_pol, "Controlling"),
  quartile_analysis(Y_emot, W, oof_cate_emot_pol, "Emotional")
)
knitr::kable(q_table_pol, caption = "Spec B (Policy): Realized ATEs by Predicted CATE Quartile (K-Fold OOF)")

Spec B (Policy): Realized ATEs by Predicted CATE Quartile (K-Fold OOF)
Outcome	Q1_lowest	Q2_Q3_middle	Q4_highest	Spread
Physical/Sexual	0.0141	-0.0324	-0.1521	-0.1662
Controlling	-0.0461	-0.0770	-0.1111	-0.0650
Emotional	-0.0263	-0.0410	-0.0539	-0.0276

# Spec C: purely observable
q_table_obs <- rbind(
  quartile_analysis(Y, W, oof_cate_phys_obs, "Physical/Sexual"),
  quartile_analysis(Y_ctrl, W, oof_cate_ctrl_obs, "Controlling"),
  quartile_analysis(Y_emot, W, oof_cate_emot_obs, "Emotional")
)
knitr::kable(q_table_obs, caption = "Spec C (Observable): Realized ATEs by Predicted CATE Quartile (K-Fold OOF)")

Spec C (Observable): Realized ATEs by Predicted CATE Quartile (K-Fold OOF)
Outcome	Q1_lowest	Q2_Q3_middle	Q4_highest	Spread
Physical/Sexual	0.0246	-0.0412	-0.1609	-0.1855
Controlling	-0.0379	-0.0757	-0.1541	-0.1162
Emotional	-0.0195	-0.0742	0.0075	0.0270

# CATE heterogeneity summary across all three specifications
cate_summary <- data.frame(
  Specification = rep(c("A (Full)", "B (Policy)", "C (Observable)"), each = 3),
  Outcome = rep(c("Physical/Sexual", "Controlling", "Emotional"), 3),
  Mean = round(c(mean(oof_cate_phys), mean(oof_cate_ctrl), mean(oof_cate_emot),
                  mean(oof_cate_phys_pol), mean(oof_cate_ctrl_pol), mean(oof_cate_emot_pol),
                  mean(oof_cate_phys_obs), mean(oof_cate_ctrl_obs), mean(oof_cate_emot_obs)), 4),
  SD = round(c(sd(oof_cate_phys), sd(oof_cate_ctrl), sd(oof_cate_emot),
                sd(oof_cate_phys_pol), sd(oof_cate_ctrl_pol), sd(oof_cate_emot_pol),
                sd(oof_cate_phys_obs), sd(oof_cate_ctrl_obs), sd(oof_cate_emot_obs)), 4),
  Min = round(c(min(oof_cate_phys), min(oof_cate_ctrl), min(oof_cate_emot),
                 min(oof_cate_phys_pol), min(oof_cate_ctrl_pol), min(oof_cate_emot_pol),
                 min(oof_cate_phys_obs), min(oof_cate_ctrl_obs), min(oof_cate_emot_obs)), 4),
  Max = round(c(max(oof_cate_phys), max(oof_cate_ctrl), max(oof_cate_emot),
                 max(oof_cate_phys_pol), max(oof_cate_ctrl_pol), max(oof_cate_emot_pol),
                 max(oof_cate_phys_obs), max(oof_cate_ctrl_obs), max(oof_cate_emot_obs)), 4),
  IQR = round(c(IQR(oof_cate_phys), IQR(oof_cate_ctrl), IQR(oof_cate_emot),
                 IQR(oof_cate_phys_pol), IQR(oof_cate_ctrl_pol), IQR(oof_cate_emot_pol),
                 IQR(oof_cate_phys_obs), IQR(oof_cate_ctrl_obs), IQR(oof_cate_emot_obs)), 4)
)

knitr::kable(cate_summary, caption = "CATE Distribution Summary Across Specifications (K-Fold OOF)")

CATE Distribution Summary Across Specifications (K-Fold OOF)
Specification	Outcome	Mean	SD	Min	Max	IQR
A (Full)	Physical/Sexual	-0.0559	0.0167	-0.0732	-0.0336	0.0326
A (Full)	Controlling	-0.0710	0.0211	-0.1083	-0.0098	0.0408
A (Full)	Emotional	-0.0464	0.0225	-0.1089	0.0705	0.0280
B (Policy)	Physical/Sexual	-0.0594	0.0218	-0.0851	-0.0287	0.0350
B (Policy)	Controlling	-0.0640	0.0243	-0.1691	0.0217	0.0416
B (Policy)	Emotional	-0.0448	0.0159	-0.0870	0.0275	0.0163
C (Observable)	Physical/Sexual	-0.0489	0.0175	-0.0713	-0.0211	0.0273
C (Observable)	Controlling	-0.0723	0.0152	-0.0898	0.0036	0.0117
C (Observable)	Emotional	-0.0422	0.0287	-0.1439	0.0501	0.0311

cf_data_plot <- cf_data
cf_data_plot$cate_phys <- oof_cate_phys
cf_data_plot$cate_group <- cut(oof_cate_phys, 
  breaks = quantile(oof_cate_phys, c(0, 0.25, 0.75, 1)),
  labels = c("Most helped", "Middle", "Least helped"),
  include.lowest = TRUE)

profile <- cf_data_plot %>%
  group_by(cate_group) %>%
  summarise(
    n = n(),
    age = round(mean(bl_age, na.rm = TRUE), 1),
    pct_secondary_edu = round(mean(bl_edu_sec, na.rm = TRUE) * 100, 1),
    pct_colombian = round(mean(bl_colom, na.rm = TRUE) * 100, 1),
    pct_indigenous = round(mean(bl_indig, na.rm = TRUE) * 100, 1),
    pct_married = round(mean(bl_married, na.rm = TRUE) * 100, 1),
    pct_baseline_violence = round(mean(bl_physical, na.rm = TRUE) * 100, 1),
    mean_asset_index = round(mean(bl_asset_index, na.rm = TRUE), 3),
    .groups = "drop"
  )

knitr::kable(profile, caption = "Profile of Women by CATE Group (Physical/Sexual Violence)")

Profile of Women by CATE Group (Physical/Sexual Violence)
cate_group	n	age	pct_secondary_edu	pct_colombian	pct_indigenous	pct_married	pct_baseline_violence	mean_asset_index
Most helped	307	36.2	38.8	34.2	3.6	44.0	14.7	0.883
Middle	674	34.1	40.7	34.1	4.5	40.5	16.6	0.270
Least helped	245	35.2	32.2	38.0	3.7	45.3	16.7	0.331

The quartile table tells us whether the causal forest is picking up meaningful heterogeneity or just noise. Look at the spread between the top and bottom quartile — a larger spread suggests meaningful heterogeneity detection. With k-fold CV, every observation has a truly out-of-sample CATE prediction, avoiding overfitting concerns. Compare Spec A and Spec B: if the quartile spread shrinks substantially in Spec B, the cross-outcome baseline IPV variables were important heterogeneity drivers (but not actionable for policy).

The profile table helps us see who falls into each CATE group. This is useful for understanding the practical implications of the heterogeneity — if we were to target the intervention, what kind of women should we prioritize?

4.4 Validation

Omnibus Test for Heterogeneity

cat("=== Specification A ===\n")

## === Specification A ===

test_cal <- test_calibration(cf)
print(test_cal)

## 
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
## 
##                                  Estimate Std. Error  t value    Pr(>t)    
## mean.forest.prediction            0.99996    0.23897   4.1845 1.531e-05 ***
## differential.forest.prediction -419.90640   12.83985 -32.7034         1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n=== Specification B (Policy-Relevant) ===\n")

## 
## === Specification B (Policy-Relevant) ===

test_cal_pol <- test_calibration(cf_pol)
print(test_cal_pol)

## 
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
## 
##                                  Estimate Std. Error  t value    Pr(>t)    
## mean.forest.prediction            1.07580    0.22989   4.6796 1.597e-06 ***
## differential.forest.prediction -460.88442   14.01695 -32.8805         1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\n=== Specification C (Purely Observable) ===\n")

## 
## === Specification C (Purely Observable) ===

test_cal_obs <- test_calibration(cf_phys_obs)
print(test_cal_obs)

## 
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
## 
##                                  Estimate Std. Error  t value  Pr(>t)  
## mean.forest.prediction            0.60326    0.30567   1.9736 0.02433 *
## differential.forest.prediction -385.32525   14.41575 -26.7295 1.00000  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The calibration test is key because it tells us whether the causal forest is detecting real heterogeneity or just overfitting to noise. We look at two things: the mean.forest.prediction coefficient tells us if the average prediction is well-calibrated (we want it close to 1 and significant), and the differential.forest.prediction tells us if there is actual heterogeneity. With the refined pipeline (orthogonalized W.hat and Y.hat, selected covariates), the calibration test may show improved results compared to the baseline approach. Examine the p-values in the output above.

Variable Importance

vi_phys <- variable_importance(cf)
vi_ctrl <- variable_importance(cf_ctrl)
vi_emot <- variable_importance(cf_emot)

# Diagnostic: check whether the causal forests produced any splits
# If tune.parameters = "all" detects no heterogeneity, it may set min.node.size
# so high that zero splits occur, resulting in zero variable importance.
cat("=== Causal Forest Split Diagnostics (Spec A) ===\n")

## === Causal Forest Split Diagnostics (Spec A) ===

cat("Physical — total importance:", sum(vi_phys),
    "| max importance:", max(vi_phys),
    "| non-zero vars:", sum(vi_phys > 0), "of", length(vi_phys), "\n")

## Physical — total importance: 0 | max importance: 0 | non-zero vars: 0 of 10

cat("Controlling — total importance:", sum(vi_ctrl),
    "| max importance:", max(vi_ctrl),
    "| non-zero vars:", sum(vi_ctrl > 0), "of", length(vi_ctrl), "\n")

## Controlling — total importance: 1 | max importance: 0.2757637 | non-zero vars: 9 of 9

cat("Emotional — total importance:", sum(vi_emot),
    "| max importance:", max(vi_emot),
    "| non-zero vars:", sum(vi_emot > 0), "of", length(vi_emot), "\n")

## Emotional — total importance: 1 | max importance: 0.1847139 | non-zero vars: 10 of 10

if (sum(vi_phys) == 0) {
  cat("\nNOTE: The Physical/Sexual violence causal forest has zero variable importance.\n")
  cat("This means the forest detected no significant treatment effect heterogeneity\n")
  cat("and tuned itself to not split. This is a genuine result, not a code error.\n")
  cat("The forest effectively estimates a constant treatment effect for this outcome.\n")
}

## 
## NOTE: The Physical/Sexual violence causal forest has zero variable importance.
## This means the forest detected no significant treatment effect heterogeneity
## and tuned itself to not split. This is a genuine result, not a code error.
## The forest effectively estimates a constant treatment effect for this outcome.

# Also compute unweighted split frequencies as an alternative diagnostic
# decay.exponent = 0 weights all splits equally regardless of depth
vi_phys_unw <- variable_importance(cf, decay.exponent = 0, max.depth = 4)
cat("\nPhysical unweighted importance (decay.exponent=0):\n")

## 
## Physical unweighted importance (decay.exponent=0):

print(round(as.numeric(vi_phys_unw), 6))

##  [1] 0 0 0 0 0 0 0 0 0 0

# Each forest uses different selected variables — create separate importance data frames
vi_phys_df <- data.frame(variable = names(X)[selected_phys], 
                          Importance = round(as.numeric(vi_phys), 6))
vi_ctrl_df <- data.frame(variable = names(X)[selected_ctrl], 
                          Importance = round(as.numeric(vi_ctrl), 6))
vi_emot_df <- data.frame(variable = names(X)[selected_emot], 
                          Importance = round(as.numeric(vi_emot), 6))

# Wide format summary table
vi_wide <- vi_phys_df %>% 
  rename(Physical = Importance) %>%
  full_join(vi_ctrl_df %>% rename(Controlling = Importance), by = "variable") %>%
  full_join(vi_emot_df %>% rename(Emotional = Importance), by = "variable") %>%
  arrange(desc(Physical))

knitr::kable(vi_wide, caption = "Causal Forest Variable Importance (Selected Variables Only)")

Causal Forest Variable Importance (Selected Variables Only)
variable	Physical	Controlling	Emotional
bl_age	0	0.216908	0.165550
bl_married	0	0.036659	0.059608
bl_partn_age	0	0.189924	0.172988
bl_nchildren0t5	0	0.160500	0.121671
bl_nchildren6t14	0	0.087424	0.106971
bl_asset_index	0	0.275764	0.184714
norooms	0	NA	NA
bl_physical	0	0.002715	0.034661
bl_emotional	0	0.022640	0.051915
bl_control	0	0.007466	0.038109
bl_carchi	NA	NA	0.063814

ggplot(vi_phys_df %>% arrange(desc(Importance)) %>% head(10), 
       aes(x = reorder(variable, Importance), y = Importance)) +
  geom_col(fill = "steelblue", alpha = 0.8) +
  coord_flip() +
  labs(title = "Top Variables Driving Heterogeneity (Physical/Sexual Violence)",
       subtitle = "Specification A",
       x = "", y = "Importance") +
  theme_minimal()

# Specification C: causal forest variable importance
vi_phys_obs_cf <- variable_importance(cf_phys_obs)
vi_ctrl_obs_cf <- variable_importance(cf_ctrl_obs)
vi_emot_obs_cf <- variable_importance(cf_emot_obs)

cat("\n=== Causal Forest Split Diagnostics (Spec C) ===\n")

## 
## === Causal Forest Split Diagnostics (Spec C) ===

cat("Physical — total importance:", sum(vi_phys_obs_cf),
    "| max importance:", max(vi_phys_obs_cf),
    "| non-zero vars:", sum(vi_phys_obs_cf > 0), "of", length(vi_phys_obs_cf), "\n")

## Physical — total importance: 0 | max importance: 0 | non-zero vars: 0 of 11

cat("Controlling — total importance:", sum(vi_ctrl_obs_cf),
    "| max importance:", max(vi_ctrl_obs_cf),
    "| non-zero vars:", sum(vi_ctrl_obs_cf > 0), "of", length(vi_ctrl_obs_cf), "\n")

## Controlling — total importance: 0.9560976 | max importance: 0.1682071 | non-zero vars: 12 of 16

cat("Emotional — total importance:", sum(vi_emot_obs_cf),
    "| max importance:", max(vi_emot_obs_cf),
    "| non-zero vars:", sum(vi_emot_obs_cf > 0), "of", length(vi_emot_obs_cf), "\n")

## Emotional — total importance: 1 | max importance: 0.1648512 | non-zero vars: 12 of 12

if (sum(vi_phys_obs_cf) == 0) {
  cat("\nNOTE: Spec C Physical/Sexual violence causal forest also has zero importance.\n")
  cat("Confirms no detectable heterogeneity for this outcome with observable covariates.\n")
}

## 
## NOTE: Spec C Physical/Sexual violence causal forest also has zero importance.
## Confirms no detectable heterogeneity for this outcome with observable covariates.

vi_phys_obs_cf_df <- data.frame(variable = names(X_pol_obs)[selected_phys_obs], 
                                 Importance = round(as.numeric(vi_phys_obs_cf), 6))
vi_ctrl_obs_cf_df <- data.frame(variable = names(X_pol_obs)[selected_ctrl_obs], 
                                 Importance = round(as.numeric(vi_ctrl_obs_cf), 6))
vi_emot_obs_cf_df <- data.frame(variable = names(X_pol_obs)[selected_emot_obs], 
                                 Importance = round(as.numeric(vi_emot_obs_cf), 6))

vi_wide_obs <- vi_phys_obs_cf_df %>% 
  rename(Physical = Importance) %>%
  full_join(vi_ctrl_obs_cf_df %>% rename(Controlling = Importance), by = "variable") %>%
  full_join(vi_emot_obs_cf_df %>% rename(Emotional = Importance), by = "variable") %>%
  arrange(desc(Physical))

knitr::kable(vi_wide_obs, caption = "Causal Forest Variable Importance — Specification C (Purely Observable)")

Causal Forest Variable Importance — Specification C (Purely Observable)
variable	Physical	Controlling	Emotional
bl_colom	0	0.053153	0.046467
bl_age	0	0.162344	0.141687
bl_married	0	0.052089	0.045651
bl_black	0	0.000000	NA
bl_dempl	0	0.046696	0.055330
bl_partn_sedu	0	0.050825	0.063719
bl_partn_age	0	0.159405	0.153237
bl_nchildren0t5	0	0.067308	0.105380
bl_nchildren6t14	0	0.078727	0.083660
bl_asset_index	0	0.168207	0.164851
norooms	0	0.000540	0.021985
bl_head	NA	0.000000	NA
bl_edu_sec	NA	0.062601	0.066964
bl_indig	NA	0.000000	NA
ownhouse	NA	0.000000	NA
bl_carchi	NA	0.054201	0.051068

ggplot(vi_phys_obs_cf_df %>% arrange(desc(Importance)) %>% head(10), 
       aes(x = reorder(variable, Importance), y = Importance)) +
  geom_col(fill = "darkgreen", alpha = 0.8) +
  coord_flip() +
  labs(title = "Top Variables Driving Heterogeneity (Physical/Sexual Violence)",
       subtitle = "Specification C (Purely Observable)",
       x = "", y = "Importance") +
  theme_minimal()

The variable importance from causal forests measures which covariates drive treatment effect heterogeneity — this is conceptually different from the outcome regression forest importance in Step 2 (which measures covariates that predict IPV levels). A variable can be a strong predictor of outcomes without driving heterogeneous treatment effects. If the causal forest importance is zero for physical/sexual violence, it means the forest detected no significant heterogeneity in the treatment effect for that outcome and tuned itself to produce essentially no splits. This is a genuine finding — the treatment effect on physical violence is approximately constant across the covariate space — and is consistent with the calibration test results and the original paper’s finding of a small, borderline-significant effect on physical violence.

Part 5: Robustness Checks

5.1 Overlap / Propensity Score Check

Since this is an RCT, the propensity scores should be approximately constant across individuals and the distributions for treated and control groups should overlap nearly perfectly. This is a basic sanity check — any separation would signal problems with the randomization.

overlap_df <- data.frame(
  W_hat = W_hat,
  Treatment = factor(W, labels = c("Control", "Treated"))
)

ggplot(overlap_df, aes(x = W_hat, fill = Treatment)) +
  geom_density(alpha = 0.5) +
  geom_vline(xintercept = mean(W), lty = 2, col = "black") +
  labs(title = "Propensity Score Distribution by Treatment Status",
       subtitle = "RCT: distributions should overlap almost perfectly",
       x = "Estimated Propensity Score (W.hat)", y = "Density") +
  theme_minimal() +
  scale_fill_brewer(palette = "Set1")

cat("Propensity score range (Control):", 
    round(range(W_hat[W == 0]), 4), "\n")

## Propensity score range (Control): 0.6157 0.8293

cat("Propensity score range (Treated):", 
    round(range(W_hat[W == 1]), 4), "\n")

## Propensity score range (Treated): 0.6114 0.8387

cat("Mean propensity score (Control):", 
    round(mean(W_hat[W == 0]), 4), "\n")

## Mean propensity score (Control): 0.7117

cat("Mean propensity score (Treated):", 
    round(mean(W_hat[W == 1]), 4), "\n")

## Mean propensity score (Treated): 0.7211

The propensity scores should cluster tightly around 0.719 for both groups. Any meaningful separation would be a red flag, but in this RCT context we expect near-perfect overlap, confirming that the orthogonalization with W.hat is appropriate.

5.2 Placebo Test (Permutation of Treatment)

We randomly shuffle the treatment assignment and re-estimate the causal forest for physical/sexual violence. Under the null of no treatment effect and no heterogeneity, the resulting CATEs should collapse to near zero. If the placebo forest still finds large treatment effects or heterogeneity, it would suggest our main results are driven by noise.

set.seed(42)

# Shuffle treatment assignment
W_placebo <- sample(W)

# Re-estimate propensity and outcome forests with placebo W
rf_W_placebo <- regression_forest(X, W_placebo, tune.parameters = "all")
W_hat_placebo <- predict(rf_W_placebo)$predictions

cf_placebo <- causal_forest(X[, selected_phys], Y, W_placebo,
                             W.hat = W_hat_placebo,
                             Y.hat = Y_hat_phys,
                             num.trees = 4000,
                             tune.parameters = "all")

ate_placebo <- average_treatment_effect(cf_placebo, target.sample = "all")
cate_placebo <- predict(cf_placebo, X[, selected_phys])$predictions

# Spec C placebo
cf_placebo_obs <- causal_forest(X_pol_obs[, selected_phys_obs], Y, W_placebo,
                                 W.hat = W_hat_placebo,
                                 Y.hat = Y_hat_phys_obs,
                                 num.trees = 4000,
                                 tune.parameters = "all")
ate_placebo_obs <- average_treatment_effect(cf_placebo_obs, target.sample = "all")
cate_placebo_obs <- predict(cf_placebo_obs, X_pol_obs[, selected_phys_obs])$predictions

cat("=== Placebo Test (Spec A) ===\n")

## === Placebo Test (Spec A) ===

cat("Placebo ATE:", round(ate_placebo[1], 4), " SE:", round(ate_placebo[2], 4), "\n")

## Placebo ATE: 0.0097  SE: 0.022

cat("Placebo CATE range:", round(range(cate_placebo), 4), "\n")

## Placebo CATE range: 0.0096 0.0096

cat("Placebo CATE SD:", round(sd(cate_placebo), 4), "\n\n")

## Placebo CATE SD: 0

cat("=== Placebo Test (Spec C) ===\n")

## === Placebo Test (Spec C) ===

cat("Placebo ATE:", round(ate_placebo_obs[1], 4), " SE:", round(ate_placebo_obs[2], 4), "\n")

## Placebo ATE: 0.0162  SE: 0.0235

cat("Placebo CATE range:", round(range(cate_placebo_obs), 4), "\n")

## Placebo CATE range: 0.0148 0.0167

cat("Placebo CATE SD:", round(sd(cate_placebo_obs), 4), "\n\n")

## Placebo CATE SD: 3e-04

# Calibration test on placebo
cat("Placebo calibration test (Spec A):\n")

## Placebo calibration test (Spec A):

print(test_calibration(cf_placebo))

## 
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
## 
##                                 Estimate Std. Error  t value Pr(>t)
## mean.forest.prediction            1.9065     1.5311   1.2452 0.1067
## differential.forest.prediction -426.8302    14.9996 -28.4560 1.0000

cat("\nPlacebo calibration test (Spec C):\n")

## 
## Placebo calibration test (Spec C):

print(test_calibration(cf_placebo_obs))

## 
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
## 
##                                 Estimate Std. Error  t value  Pr(>t)  
## mean.forest.prediction            1.6828     1.0053   1.6738 0.04721 *
## differential.forest.prediction -414.8744    14.6794 -28.2624 1.00000  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Compare CATE distributions
par(mfrow = c(1, 3))
d_real <- density(oob_cate_phys)
d_plac <- density(cate_placebo)
d_plac_obs <- density(cate_placebo_obs)
xlims <- range(c(d_real$x, d_plac$x, d_plac_obs$x))

plot(d_real, main = "Real Treatment CATEs (Spec A)",
     xlab = "Treatment Effect", xlim = xlims)
abline(v = 0, col = "blue", lty = 3)
abline(v = mean(oob_cate_phys), col = "red", lty = 3)

plot(d_plac, main = "Placebo CATEs (Spec A, Shuffled W)",
     xlab = "Treatment Effect", xlim = xlims)
abline(v = 0, col = "blue", lty = 3)
abline(v = mean(cate_placebo), col = "red", lty = 3)

plot(d_plac_obs, main = "Placebo CATEs (Spec C, Shuffled W)",
     xlab = "Treatment Effect", xlim = xlims)
abline(v = 0, col = "blue", lty = 3)
abline(v = mean(cate_placebo_obs), col = "red", lty = 3)

par(mfrow = c(1, 1))

If the pipeline is working correctly, the placebo ATE should be close to zero (within noise), the CATE distribution should be much tighter than the real one, and the calibration test should show no significance. This confirms that our main findings are not artefacts of the forest overfitting.

5.3 Full vs Selected Covariates Comparison

We compare our refined causal forest (with pre-selected covariates and orthogonalization) against a baseline forest using all 19 covariates without orthogonalization. If the results are qualitatively similar, it increases confidence. If the refined pipeline shows tighter estimates or better calibration, it demonstrates the value of the two-step approach.

set.seed(100241)

# Baseline causal forest: all covariates, no orthogonalization
cf_full <- causal_forest(X, Y, W,
                          num.trees = 4000,
                          sample.fraction = 0.5,
                          mtry = min(ceiling(sqrt(ncol(X)) + 20), ncol(X)),
                          min.node.size = 20,
                          honesty = TRUE,
                          honesty.fraction = 0.5,
                          honesty.prune.leaves = TRUE,
                          tune.parameters = "none",
                          compute.oob.predictions = TRUE)

ate_full <- average_treatment_effect(cf_full, target.sample = "all")
cate_full <- predict(cf_full)$predictions  # OOB predictions

cat("=== Physical/Sexual Violence ===\n")

## === Physical/Sexual Violence ===

cat("Full covariates ATE:", round(ate_full[1], 4), " SE:", round(ate_full[2], 4), "\n")

## Full covariates ATE: -0.0583  SE: 0.0238

cat("Selected covariates (Spec A) ATE:", round(ate_all[1], 4), " SE:", round(ate_all[2], 4), "\n")

## Selected covariates (Spec A) ATE: -0.0577  SE: 0.0239

cat("Observable covariates (Spec C) ATE:", round(ate_all_obs[1], 4), " SE:", round(ate_all_obs[2], 4), "\n\n")

## Observable covariates (Spec C) ATE: -0.0487  SE: 0.0252

# Calibration comparison
cat("Calibration test — Full covariates:\n")

## Calibration test — Full covariates:

print(test_calibration(cf_full))

## 
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
## 
##                                Estimate Std. Error t value   Pr(>t)   
## mean.forest.prediction          0.97190    0.40534  2.3978 0.008322 **
## differential.forest.prediction -0.89774    1.40927 -0.6370 0.737884   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\nCalibration test — Selected covariates (Spec A):\n")

## 
## Calibration test — Selected covariates (Spec A):

print(test_calibration(cf))

## 
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
## 
##                                  Estimate Std. Error  t value    Pr(>t)    
## mean.forest.prediction            0.99996    0.23897   4.1845 1.531e-05 ***
## differential.forest.prediction -419.90640   12.83985 -32.7034         1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

cat("\nCalibration test — Observable covariates (Spec C):\n")

## 
## Calibration test — Observable covariates (Spec C):

print(test_calibration(cf_phys_obs))

## 
## Best linear fit using forest predictions (on held-out data)
## as well as the mean forest prediction as regressors, along
## with one-sided heteroskedasticity-robust (HC3) SEs:
## 
##                                  Estimate Std. Error  t value  Pr(>t)  
## mean.forest.prediction            0.60326    0.30567   1.9736 0.02433 *
## differential.forest.prediction -385.32525   14.41575 -26.7295 1.00000  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Compare CATE distributions
par(mfrow = c(1, 3))
d_full <- density(cate_full)
d_sel  <- density(oob_cate_phys)
cate_obs_oob <- predict(cf_phys_obs)$predictions
d_obs  <- density(cate_obs_oob)
xlims <- range(c(d_full$x, d_sel$x, d_obs$x))

plot(d_full, main = "CATEs: Full Covariates (19 vars)",
     xlab = "Treatment Effect", xlim = xlims)
abline(v = c(0, mean(cate_full)), col = c("blue", "red"), lty = 3)

plot(d_sel, main = paste0("CATEs: Spec A Selected (", length(selected_phys), " vars)"),
     xlab = "Treatment Effect", xlim = xlims)
abline(v = c(0, mean(oob_cate_phys)), col = c("blue", "red"), lty = 3)

plot(d_obs, main = paste0("CATEs: Spec C Observable (", length(selected_phys_obs), " vars)"),
     xlab = "Treatment Effect", xlim = xlims)
abline(v = c(0, mean(cate_obs_oob)), col = c("blue", "red"), lty = 3)

par(mfrow = c(1, 1))

comparison_robustness <- data.frame(
  Specification = c("Full covariates (baseline)", 
                     "Spec A: Selected + orthogonalized",
                     "Spec C: Observable + orthogonalized"),
  ATE = round(c(ate_full[1], ate_all[1], ate_all_obs[1]), 4),
  SE = round(c(ate_full[2], ate_all[2], ate_all_obs[2]), 4),
  CATE_SD = round(c(sd(cate_full), sd(oob_cate_phys), sd(cate_obs_oob)), 4),
  CATE_Range = c(
    paste0("[", round(min(cate_full), 4), ", ", round(max(cate_full), 4), "]"),
    paste0("[", round(min(oob_cate_phys), 4), ", ", round(max(oob_cate_phys), 4), "]"),
    paste0("[", round(min(cate_obs_oob), 4), ", ", round(max(cate_obs_oob), 4), "]")
  ),
  N_vars = c(ncol(X), length(selected_phys), length(selected_phys_obs))
)

knitr::kable(comparison_robustness, 
             caption = "Comparison: Full vs Selected vs Observable Covariates (Physical/Sexual Violence)")

Comparison: Full vs Selected vs Observable Covariates (Physical/Sexual Violence)
Specification	ATE	SE	CATE_SD	CATE_Range	N_vars
Full covariates (baseline)	-0.0583	0.0238	0.0173	[-0.0916, -0.0253]	19
Spec A: Selected + orthogonalized	-0.0577	0.0239	0.0009	[-0.0598, -0.0533]	10
Spec C: Observable + orthogonalized	-0.0487	0.0252	0.0010	[-0.0557, -0.0474]	11

The key things to look for: (1) ATEs should be similar across specifications — if they diverge substantially, one model may be misspecified. (2) The refined pipeline should ideally show a similar or tighter CATE distribution with better calibration test results, confirming that pre-selection and orthogonalization helped rather than hurt.

5.4 Selection Threshold Sensitivity

The default threshold for variable selection is 0.2 × mean importance. Here we test two alternative thresholds — a more permissive one (0.1×, keeps more variables) and a stricter one (0.5×, keeps fewer) — to check whether results are sensitive to this choice.

set.seed(100241)

thresholds <- c(0.1, 0.2, 0.5)
sensitivity_results <- data.frame()

for (thr in thresholds) {
  sel <- which(Y_vi_phys / mean(Y_vi_phys) > thr)
  n_sel <- length(sel)
  
  cf_sens <- causal_forest(X[, sel], Y, W,
                            W.hat = W_hat,
                            Y.hat = Y_hat_phys,
                            num.trees = 4000,
                            tune.parameters = "all")
  
  ate_sens <- average_treatment_effect(cf_sens, target.sample = "all")
  cate_sens <- predict(cf_sens, X[, sel])$predictions
  cal_sens <- test_calibration(cf_sens)
  
  sensitivity_results <- rbind(sensitivity_results, data.frame(
    Threshold = paste0(thr, "x"),
    N_vars = n_sel,
    Variables = paste(names(X)[sel], collapse = ", "),
    ATE = round(ate_sens[1], 4),
    SE = round(ate_sens[2], 4),
    CATE_SD = round(sd(cate_sens), 4),
    Cal_mean_p = round(cal_sens[1, 4], 4),
    Cal_diff_p = round(cal_sens[2, 4], 4)
  ))
}

knitr::kable(sensitivity_results %>% select(-Variables), 
             caption = "Sensitivity to Variable Selection Threshold (Physical/Sexual Violence)",
             col.names = c("Threshold", "# Vars", "ATE", "SE", "CATE SD", 
                           "Calibration (mean) p", "Calibration (diff) p"))

Sensitivity to Variable Selection Threshold (Physical/Sexual Violence)
	Threshold	# Vars	ATE	SE	CATE SD	Calibration (mean) p	Calibration (diff) p
estimate	0.1x	14	-0.0578	0.0239	5e-04	0.0025	1
estimate1	0.2x	10	-0.0577	0.0239	0e+00	0.0004	1
estimate2	0.5x	6	-0.0577	0.0239	0e+00	0.0000	1

cat("Variables selected at each threshold:\n\n")

## Variables selected at each threshold:

for (i in 1:nrow(sensitivity_results)) {
  cat(sensitivity_results$Threshold[i], "(", sensitivity_results$N_vars[i], "vars):",
      sensitivity_results$Variables[i], "\n\n")
}

## 0.1x ( 14 vars): bl_age, bl_edu_sec, bl_married, bl_dempl, bl_partn_sedu, bl_partn_age, bl_nchildren0t5, bl_nchildren6t14, bl_asset_index, norooms, bl_carchi, bl_physical, bl_emotional, bl_control 
## 
## 0.2x ( 10 vars): bl_age, bl_married, bl_partn_age, bl_nchildren0t5, bl_nchildren6t14, bl_asset_index, norooms, bl_physical, bl_emotional, bl_control 
## 
## 0.5x ( 6 vars): bl_age, bl_partn_age, bl_asset_index, bl_physical, bl_emotional, bl_control

If the ATE is stable across thresholds but the calibration test improves at one particular threshold, that gives guidance on the optimal level of pre-selection. Large swings in ATE would suggest the results are sensitive and should be interpreted cautiously.

5.5 Bootstrap ATE Confidence Intervals

The average_treatment_effect function provides asymptotic standard errors. Here we complement these with bootstrap confidence intervals by re-estimating the causal forest on B bootstrap samples and collecting the ATE distribution.

set.seed(12345)
B <- 200  # number of bootstrap replications

boot_ate_phys <- numeric(B)
boot_ate_ctrl <- numeric(B)
boot_ate_emot <- numeric(B)
n <- nrow(cf_data)  # re-stated explicitly; cf_data is unchanged from cv-setup

for (b in 1:B) {
  idx <- sample(1:n, n, replace = TRUE)
  X_b <- X[idx, ]
  Y_b <- Y[idx]
  W_b <- W[idx]
  Y_ctrl_b <- Y_ctrl[idx]
  Y_emot_b <- Y_emot[idx]
  W_hat_b <- W_hat[idx]
  Y_hat_phys_b <- Y_hat_phys[idx]
  Y_hat_ctrl_b <- Y_hat_ctrl[idx]
  Y_hat_emot_b <- Y_hat_emot[idx]
  
  cf_b_phys <- causal_forest(X_b[, selected_phys], Y_b, W_b,
                              W.hat = W_hat_b, Y.hat = Y_hat_phys_b,
                              num.trees = 2000, tune.parameters = "none")
  boot_ate_phys[b] <- average_treatment_effect(cf_b_phys, target.sample = "all")[1]
  
  cf_b_ctrl <- causal_forest(X_b[, selected_ctrl], Y_ctrl_b, W_b,
                              W.hat = W_hat_b, Y.hat = Y_hat_ctrl_b,
                              num.trees = 2000, tune.parameters = "none")
  boot_ate_ctrl[b] <- average_treatment_effect(cf_b_ctrl, target.sample = "all")[1]
  
  cf_b_emot <- causal_forest(X_b[, selected_emot], Y_emot_b, W_b,
                              W.hat = W_hat_b, Y.hat = Y_hat_emot_b,
                              num.trees = 2000, tune.parameters = "none")
  boot_ate_emot[b] <- average_treatment_effect(cf_b_emot, target.sample = "all")[1]
}

# Summary table
boot_table <- data.frame(
  Outcome = c("Physical/Sexual", "Controlling", "Emotional"),
  Point_ATE = round(c(ate_all[1], ate_ctrl[1], ate_emot[1]), 4),
  Asymptotic_SE = round(c(ate_all[2], ate_ctrl[2], ate_emot[2]), 4),
  Boot_SE = round(c(sd(boot_ate_phys), sd(boot_ate_ctrl), sd(boot_ate_emot)), 4),
  Boot_CI_lower = round(c(
    quantile(boot_ate_phys, 0.025),
    quantile(boot_ate_ctrl, 0.025),
    quantile(boot_ate_emot, 0.025)), 4),
  Boot_CI_upper = round(c(
    quantile(boot_ate_phys, 0.975),
    quantile(boot_ate_ctrl, 0.975),
    quantile(boot_ate_emot, 0.975)), 4)
)

knitr::kable(boot_table,
             col.names = c("Outcome", "Point ATE", "Asymptotic SE", "Bootstrap SE",
                           "Boot 2.5%", "Boot 97.5%"),
             caption = "Bootstrap vs Asymptotic ATE Inference (B = 200)")

Bootstrap vs Asymptotic ATE Inference (B = 200)
Outcome	Point ATE	Asymptotic SE	Bootstrap SE	Boot 2.5%	Boot 97.5%
Physical/Sexual	-0.0577	0.0239	0.0238	-0.0998	-0.0065
Controlling	-0.0697	0.0293	0.0278	-0.1258	-0.0229
Emotional	-0.0464	0.0287	0.0276	-0.0972	0.0081

par(mfrow = c(1, 3))

hist(boot_ate_phys, breaks = 30, main = "Bootstrap ATEs: Physical/Sexual",
     xlab = "ATE", col = "steelblue", border = "white")
abline(v = ate_all[1], col = "red", lwd = 2)
abline(v = 0, col = "blue", lty = 3)

hist(boot_ate_ctrl, breaks = 30, main = "Bootstrap ATEs: Controlling",
     xlab = "ATE", col = "steelblue", border = "white")
abline(v = ate_ctrl[1], col = "red", lwd = 2)
abline(v = 0, col = "blue", lty = 3)

hist(boot_ate_emot, breaks = 30, main = "Bootstrap ATEs: Emotional",
     xlab = "ATE", col = "steelblue", border = "white")
abline(v = ate_emot[1], col = "red", lwd = 2)
abline(v = 0, col = "blue", lty = 3)

par(mfrow = c(1, 1))

If the bootstrap SEs are similar to the asymptotic SEs, this confirms that the asymptotic theory works well in this sample. If the bootstrap CIs are wider, it suggests the asymptotic SEs may be too optimistic. The red line shows the point estimate and the blue dashed line marks zero — if the bulk of the bootstrap distribution falls to one side of zero, the treatment effect is robust.

Summary & Methodological Notes

Athey/Wager Two-Step Pipeline

This analysis follows the recommended pipeline from Athey & Wager:

Step 1 — Propensity Forest: Confirms randomization by showing no covariate strongly predicts treatment assignment. Generates W.hat for orthogonalization.
Step 2 — Outcome Regression Forests: Identifies which covariates are strong predictors of each IPV outcome. Generates Y.hat for orthogonalization and selects variables above the 0.2 × mean importance threshold.
Step 3 — Refined Causal Forests: Uses selected covariates plus W.hat and Y.hat for orthogonalization, improving the signal-to-noise ratio for treatment effect heterogeneity detection.

Three Specifications

Specification	Description	Covariates Excluded
A	Full covariates	None
B	Policy-relevant (keep baseline same outcome)	Cross-outcome baseline IPV
C	Purely observable targeting	All baseline IPV (bl_physical, bl_emotional, bl_control)

Specification C represents the most policy-realistic scenario: policymakers only have access to observable demographic and socioeconomic characteristics for targeting, with no confidential IPV survey data at all.

Key Findings

Married women seem to benefit less than women in informal unions — possibly reflecting differences in bargaining dynamics within relationships.
Indigenous women tend to benefit more, especially for controlling behaviors — a potentially important finding for targeting.
Having more young children (0-5) dampens the treatment effect across multiple outcomes.
Province effects (Carchi vs Sucumbíos) may moderate treatment effectiveness.

Robustness Summary

Five robustness checks were conducted:

Overlap check (5.1): Propensity scores should be tightly clustered for both groups, confirming the RCT design and validating the use of W.hat orthogonalization.
Placebo test (5.2): Shuffling treatment labels should collapse ATEs and CATEs to near zero with no significant calibration, confirming results are not artefacts.
Full vs selected covariates (5.3): Comparing the refined pipeline against a baseline forest with all covariates. Similar ATEs increase confidence; better calibration from the refined pipeline validates the two-step approach.
Threshold sensitivity (5.4): ATE stability across selection thresholds (0.1×, 0.2×, 0.5×) confirms results are not driven by an arbitrary variable selection cutoff.
Bootstrap CIs (5.5): 200-replicate bootstrap provides non-parametric confidence intervals for comparison against asymptotic SEs.

Replication & Causal Forest Extension: Hidrobo, Peterman & Heise (2016)

The Effect of Cash, Vouchers, and Food Transfers on Intimate Partner Violence

Cedric Kopp

2026-05-16