For this project, I created a love plot to assess the covariate balance between treatment and control groups in a sample dataset (synthetic) called “reiner”. The goal was to get the absolute mean differences below 0.2 for all variables in the table. As the plot shows, the matching was successful in achieving covariate balance on the two problem variables, age and income.
Code can be found below.
library(cobalt)
library(MatchIt)
library(WeightIt)
library(optmatch)
library(tidyverse)
# Set the random seed for reproducibility
set.seed(123)
# Set the number of observations
n <- 5000
# Create data frame
reiner <- tibble(
treatment = rbinom(n, 1, 0.5), # binary treatment
age = rnorm(n, 50, 10), # normally distributed age
gender = rbinom(n, 1, 0.5), # binary gender
income = rnorm(n, 50000, 10000), # normally distributed income
)
# Adjust the confounding variables based on the treatment assignment
reiner$age[reiner$treatment == 1] <- reiner$age[reiner$treatment == 1] + 10
reiner$income[reiner$treatment == 1] <- reiner$income[reiner$treatment == 1] + 5000
# Create an outcome that is influenced by the treatment and some confounding variables.
reiner$outcome <- with(reiner, treatment * 5 + age * 0.2 + gender * 3 + rnorm(n, 0, 5))
# Add some noise variables
for (i in 1:10) {
reiner[[paste0("noise", i)]] <- rnorm(n, 0, 1)
}
love.plot(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10,
data = reiner, abs = T, thresholds = 0.2)
# matching
m.out <- matchit(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10, data = reiner, method = "nearest", exact = ~gender)
love.plot(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10,
data = reiner, weights = list(PSM = m.out), abs = T, thresholds = 0.2) # not working that well
# caliper matchin
m.mahal.out <-matchit(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10, data=data, method = "nearest", caliper = 0.1, mahvars = ~age + income)
love.plot(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10,
data = reiner, weights = list(PSM = m.mahal.out), abs = T, thresholds = 0.2) # not working well
# full match
m.out.full <- matchit(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10, data = reiner, method = "full", estimand = "ATT")
love.plot(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10,
data = reiner, weights = list(full = m.out.full), abs = T, thresholds = 0.2)
summary(m.out.full) # worked well!
# weighting
m.weight <- weightit(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10, data = reiner, method = "ps", estimand = "ATT")
love.plot(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10,
data = reiner, weights = list(PSW = m.weight), abs = T, thresholds = 0.2) # this worked the best
### now do regression
# unadjusted regression results
summary(lm(outcome ~ treatment + age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10, data = reiner))
# matched regression results
matched <- match.data(m.out.full, data = reiner)
model1 = lm(outcome ~ treatment + age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10, data = matched, weights = weights)
coeftest(model1, vcov. = vcovCL, cluster = ~subclass)
### visualizing for display
love.plot(treatment ~ age + gender + income + noise1 + noise2 + noise3 + noise4 + noise5 + noise6 + noise7 + noise8 + noise9 + noise10,
data = reiner, weights = list(PSW = m.weight), abs = T, thresholds = 0.2, sample.names = c("unmatched", "matched")) + ggtitle("matched vs unmatched covariate balance")