Import required packages,load data, define auxiliary functions.
library(flexdashboard)
library(MatchIt)
library(WeightIt)
library(cobalt)
load("~/MWR_course/exercises/exercise_data.RData")
n <-names(d)
vars <- paste(n[!n %in% c("treat","re78")],collapse = " + ")
vars2 <- paste(vars,"+ I(age^2) + I(educ^2) + I(re74^2) + I(re75^2)")
f2 <- as.formula(paste("treat ~",vars2))
love_plot <- function(x) {
love.plot(x,
binary = "std" ,
continuous = "std" ,
abs = TRUE ,
stats = c("m", "ks") ,
s.d.denom = "treat",
line = TRUE ,
var.order = "adj" ,
thresholds = c(.10, .05))
}
est_fun <- function(data, index) {
m_out <- matchit(as.formula(f2),
data = data[index,],
replace = TRUE)
fit <- lm(re78 ~ treat,
data = data[index,],
weights = m_out$weights)
return(coef(fit)["mbsmoke"])
}Ex 5. Conduct 1:1 nearest-neighbor matching on the log odds of the propensity score. Use bal.plot() o compare the overall propensity score distributions. Do once without replacement and once with replacement. Why do you think there’s a difference? Try to figure it out. Estimate the ATT for each assumption (i.e., with or without replacement). If you achieve good overall balance on the propensity score, try checking individual covariate balance using love.plot().
set.seed(123)
match_out_noreplace <- matchit(formula = f2,
data=d,
distance = "linear.logit",
m.order = "largest",
replace = F)
match_out_noreplace
Call:
matchit(formula = f2, data = d, distance = "linear.logit", m.order = "largest",
replace = F)
Sample sizes:
Control Treated
All 2213 297
Matched 297 297
Unmatched 1916 0
Discarded 0 0
Call:
lm(formula = re78 ~ treat, data = d, weights = match_out_noreplace$weights)
Coefficients:
(Intercept) treat
9.805 -3.829
match_out_replace <- matchit(formula = f2,
data=d,
distance = "linear.logit",
m.order = "largest",
replace = T)
match_out_replace
Call:
matchit(formula = f2, data = d, distance = "linear.logit", m.order = "largest",
replace = T)
Sample sizes:
Control Treated
All 2213 297
Matched 89 297
Unmatched 2124 0
Discarded 0 0
Call:
lm(formula = re78 ~ treat, data = d, weights = match_out_replace$weights)
Coefficients:
(Intercept) treat
6.1595 -0.1832
Summary of weights
- Weight ranges:
Min Max
treated 1 || 1.0000
control 0 |---------------------------| 83.9742
- Units with 5 greatest weights by group:
2214 2215 2216 2218 2219
treated 1 1 1 1 1
1485 1521 1484 1478 56
control 22.7435 22.7435 27.3148 48.3254 83.9742
- Weight statistics:
Coef of Var MAD Entropy
treated 0.000 0.000 0.000
control 12.211 1.732 4877.368
overall 7.747 1.572 3273.499
- Effective Sample Sizes:
Control Treated
Unweighted 2213.000 297
Weighted 14.749 297
Estimate the ATT.
Call:
lm(formula = re78 ~ treat, data = d, weights = W1$weights)
Coefficients:
(Intercept) treat
4.837 1.139
Check for covariate balance.
Ex 7. Now do the same as above using covariate balancing propensity scores.
Summary of weights
- Weight ranges:
Min Max
treated 1 || 1.0000
control 0 |---------------------------| 20.9755
- Units with 5 greatest weights by group:
2214 2215 2217 2218 2219
treated 1 1 1 1 1
1521 1484 1485 1478 56
control 11.7352 12.8239 12.8239 15.3359 20.9755
- Weight statistics:
Coef of Var MAD Entropy
treated 0.000 0.000 0.000
control 6.910 1.624 1845.978
overall 3.789 1.515 1020.622
- Effective Sample Sizes:
Control Treated
Unweighted 2213.000 297
Weighted 45.418 297
Estimate the ATT.
Call:
lm(formula = re78 ~ treat, data = d, weights = W1$weights)
Coefficients:
(Intercept) treat
5.87859 0.09776
Check for covariate balance.