Import required packages,load data, define auxiliary functions.

library(flexdashboard)
library(MatchIt)
library(WeightIt)
library(cobalt)
load("~/MWR_course/exercises/exercise_data.RData")
n <-names(d)
vars <- paste(n[!n %in% c("treat","re78")],collapse = " + ")
vars2 <- paste(vars,"+ I(age^2) + I(educ^2) + I(re74^2) + I(re75^2)")
f2 <- as.formula(paste("treat ~",vars2))

love_plot <- function(x) {
  love.plot(x,
            binary = "std" ,
            continuous = "std" ,
            abs = TRUE ,
            stats = c("m", "ks") ,
            s.d.denom = "treat",
            line = TRUE ,
            var.order = "adj" ,
            thresholds = c(.10, .05))
}
est_fun <- function(data, index) {
  m_out <- matchit(as.formula(f2),
                   data = data[index,],
                   replace = TRUE)
  fit <- lm(re78 ~ treat,
            data = data[index,],
            weights = m_out$weights)
  return(coef(fit)["mbsmoke"])
}

Ex 5. Conduct 1:1 nearest-neighbor matching on the log odds of the propensity score. Use bal.plot() o compare the overall propensity score distributions. Do once without replacement and once with replacement. Why do you think there’s a difference? Try to figure it out. Estimate the ATT for each assumption (i.e., with or without replacement). If you achieve good overall balance on the propensity score, try checking individual covariate balance using love.plot().

No replacement

set.seed(123)
match_out_noreplace <- matchit(formula = f2, 
                     data=d, 
                     distance = "linear.logit",
                     m.order = "largest",
                     replace = F)
match_out_noreplace

Call: 
matchit(formula = f2, data = d, distance = "linear.logit", m.order = "largest", 
    replace = F)

Sample sizes:
          Control Treated
All          2213     297
Matched       297     297
Unmatched    1916       0
Discarded       0       0
lm(re78~treat,data=d,weights=match_out_noreplace$weights)

Call:
lm(formula = re78 ~ treat, data = d, weights = match_out_noreplace$weights)

Coefficients:
(Intercept)        treat  
      9.805       -3.829  
plot(match_out_noreplace,type="jitter",interactive=F)
love_plot(match_out_noreplace)

Replacement

match_out_replace <- matchit(formula = f2, 
                     data=d, 
                     distance = "linear.logit",
                     m.order = "largest",
                     replace = T)
match_out_replace

Call: 
matchit(formula = f2, data = d, distance = "linear.logit", m.order = "largest", 
    replace = T)

Sample sizes:
          Control Treated
All          2213     297
Matched        89     297
Unmatched    2124       0
Discarded       0       0
lm(re78~treat,data=d,weights=match_out_replace$weights)

Call:
lm(formula = re78 ~ treat, data = d, weights = match_out_replace$weights)

Coefficients:
(Intercept)        treat  
     6.1595      -0.1832  
plot(match_out_replace,type="jitter",interactive=F)
love_plot(match_out_replace)


Ex 6. Estimate propensity scores and ATT weights using weightit(). Ignore the warning you get. We’ll discuss that more in class.

W1 <- weightit(formula = f2,
               data=d,
               method= "ps",
               estimand = "ATT")
summary(W1)
                 Summary of weights

- Weight ranges:

        Min                                   Max
treated   1 ||                             1.0000
control   0 |---------------------------| 83.9742

- Units with 5 greatest weights by group:
                                                
            2214    2215    2216    2218    2219
 treated       1       1       1       1       1
            1485    1521    1484    1478      56
 control 22.7435 22.7435 27.3148 48.3254 83.9742

- Weight statistics:

        Coef of Var   MAD  Entropy
treated       0.000 0.000    0.000
control      12.211 1.732 4877.368
overall       7.747 1.572 3273.499

- Effective Sample Sizes:

            Control Treated
Unweighted 2213.000     297
Weighted     14.749     297

Estimate the ATT.

lm(re78 ~ treat, data=d, weights = W1$weights)

Call:
lm(formula = re78 ~ treat, data = d, weights = W1$weights)

Coefficients:
(Intercept)        treat  
      4.837        1.139  

Check for covariate balance.

Ex 7. Now do the same as above using covariate balancing propensity scores.

W1 <- weightit(formula = f2,
               data=d,
               method= "cbps",
               over= T,
               estimand = "ATT")
summary(W1)
                 Summary of weights

- Weight ranges:

        Min                                   Max
treated   1  ||                            1.0000
control   0 |---------------------------| 20.9755

- Units with 5 greatest weights by group:
                                                
            2214    2215    2217    2218    2219
 treated       1       1       1       1       1
            1521    1484    1485    1478      56
 control 11.7352 12.8239 12.8239 15.3359 20.9755

- Weight statistics:

        Coef of Var   MAD  Entropy
treated       0.000 0.000    0.000
control       6.910 1.624 1845.978
overall       3.789 1.515 1020.622

- Effective Sample Sizes:

            Control Treated
Unweighted 2213.000     297
Weighted     45.418     297

Estimate the ATT.

lm(re78 ~ treat, data=d, weights = W1$weights)

Call:
lm(formula = re78 ~ treat, data = d, weights = W1$weights)

Coefficients:
(Intercept)        treat  
    5.87859      0.09776  

Check for covariate balance.