Synthetic Difference in Differences – `synthdid` R package

knitr::opts_chunk$set(
  warning = FALSE, 
  message = FALSE,
  echo = TRUE, 
  dpi = 300, 
  tidy = "styler", 
  cache = TRUE,
  fig.width = 8, 
  fig.height = 5
)

Import Packages

Check: synthdid 📦 R package tutorial.

library(tidyverse)
# devtools::install_github("synth-inference/synthdid")
library(synthdid)
set.seed(12345)

Import Data

data("california_prop99")

1. Data Requirement

The data should be “long” panel with columns: unit, time, outcome, treatment
Treat is 0 before treatment, 1 after (and 0 for control always)
SDID expects a balanced panel and simultaneous adoption (not staggered adoption) for treated units.

# check data
# the data has the "long" format
head(california_prop99)

##         State Year PacksPerCapita treated
## 1     Alabama 1970           89.8       0
## 2    Arkansas 1970          100.3       0
## 3    Colorado 1970          124.8       0
## 4 Connecticut 1970          120.0       0
## 5    Delaware 1970          155.0       0
## 6     Georgia 1970          109.9       0

tail(california_prop99) # note: treated units are last

##              State Year PacksPerCapita treated
## 1204       Vermont 2000           88.9       0
## 1205      Virginia 2000           96.7       0
## 1206 West Virginia 2000          107.9       0
## 1207     Wisconsin 2000           80.1       0
## 1208       Wyoming 2000           90.5       0
## 1209    California 2000           41.6       1

Convert to matrices format:

setup <- panel.matrices(
  california_prop99,
  unit = "State",
  time = "Year",
  outcome = "PacksPerCapita",
  treatment = "treated", # binary indicator
  treated.last = TRUE
)

# check the result from `panel.matrix`

dim(setup$Y) # panel matrix dimension = N0 * T0

## [1] 39 31

# data matrix: each row represents a unit, each column represents a time period
setup$Y %>% head()

##              1970  1971  1972  1973  1974  1975  1976  1977  1978  1979  1980
## Alabama      89.8  95.4 101.1 102.9 108.2 111.7 116.2 117.1 123.0 121.4 123.2
## Arkansas    100.3 104.1 103.9 108.0 109.7 114.8 119.1 122.6 127.3 126.5 131.8
## Colorado    124.8 125.5 134.3 137.9 132.8 131.0 134.2 132.0 129.2 131.5 131.0
## Connecticut 120.0 117.6 110.8 109.3 112.4 110.2 113.4 117.3 117.5 117.4 118.0
## Delaware    155.0 161.1 156.3 154.7 151.3 147.6 153.0 153.3 155.5 150.2 150.5
## Georgia     109.9 115.7 117.0 119.8 123.7 122.9 125.9 127.9 130.6 131.0 134.0
##              1981  1982  1983  1984  1985  1986  1987  1988  1989  1990  1991
## Alabama     119.6 119.1 116.3 113.0 114.5 116.3 114.0 112.1 105.6 108.6 107.9
## Arkansas    128.7 127.4 128.0 123.1 125.8 126.0 122.3 121.5 118.3 113.1 116.8
## Colorado    133.8 130.5 125.3 119.7 112.4 109.9 102.4  94.6  88.8  87.4  90.2
## Connecticut 116.4 114.7 114.1 112.5 111.0 108.5 109.0 104.8 100.6  91.5  86.7
## Delaware    152.6 154.1 149.6 144.0 144.5 142.4 141.0 137.1 131.7 127.2 118.8
## Georgia     131.7 131.2 128.6 126.3 128.8 129.0 129.3 124.1 117.1 113.8 109.6
##              1992  1993  1994  1995  1996  1997  1998  1999  2000
## Alabama     109.1 108.5 107.1 102.6 101.4 104.9 106.2 100.7  96.2
## Arkansas    126.0 113.8 108.8 113.0 110.7 108.7 109.5 104.8  99.4
## Colorado     88.3  88.6  89.1  85.4  83.1  81.3  81.2  79.6  73.0
## Connecticut  83.5  79.1  76.6  79.3  76.0  75.9  75.5  73.4  71.4
## Delaware    120.0 123.8 126.1 127.2 128.3 124.1 132.8 139.5 140.7
## Georgia     109.2 109.2 107.8 100.3 102.7 100.6 100.5  97.1  88.4

setup$N0 # N0: the number of control units

## [1] 38

setup$T0 # T0: the number of pre-treatment time periods

## [1] 19

setup$W %>% head() # W: the matrix of treatment indicators

##             1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982
## Alabama        0    0    0    0    0    0    0    0    0    0    0    0    0
## Arkansas       0    0    0    0    0    0    0    0    0    0    0    0    0
## Colorado       0    0    0    0    0    0    0    0    0    0    0    0    0
## Connecticut    0    0    0    0    0    0    0    0    0    0    0    0    0
## Delaware       0    0    0    0    0    0    0    0    0    0    0    0    0
## Georgia        0    0    0    0    0    0    0    0    0    0    0    0    0
##             1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995
## Alabama        0    0    0    0    0    0    0    0    0    0    0    0    0
## Arkansas       0    0    0    0    0    0    0    0    0    0    0    0    0
## Colorado       0    0    0    0    0    0    0    0    0    0    0    0    0
## Connecticut    0    0    0    0    0    0    0    0    0    0    0    0    0
## Delaware       0    0    0    0    0    0    0    0    0    0    0    0    0
## Georgia        0    0    0    0    0    0    0    0    0    0    0    0    0
##             1996 1997 1998 1999 2000
## Alabama        0    0    0    0    0
## Arkansas       0    0    0    0    0
## Colorado       0    0    0    0    0
## Connecticut    0    0    0    0    0
## Delaware       0    0    0    0    0
## Georgia        0    0    0    0    0

tail(setup$W)

##               1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982
## Vermont          0    0    0    0    0    0    0    0    0    0    0    0    0
## Virginia         0    0    0    0    0    0    0    0    0    0    0    0    0
## West Virginia    0    0    0    0    0    0    0    0    0    0    0    0    0
## Wisconsin        0    0    0    0    0    0    0    0    0    0    0    0    0
## Wyoming          0    0    0    0    0    0    0    0    0    0    0    0    0
## California       0    0    0    0    0    0    0    0    0    0    0    0    0
##               1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995
## Vermont          0    0    0    0    0    0    0    0    0    0    0    0    0
## Virginia         0    0    0    0    0    0    0    0    0    0    0    0    0
## West Virginia    0    0    0    0    0    0    0    0    0    0    0    0    0
## Wisconsin        0    0    0    0    0    0    0    0    0    0    0    0    0
## Wyoming          0    0    0    0    0    0    0    0    0    0    0    0    0
## California       0    0    0    0    0    0    1    1    1    1    1    1    1
##               1996 1997 1998 1999 2000
## Vermont          0    0    0    0    0
## Virginia         0    0    0    0    0
## West Virginia    0    0    0    0    0
## Wisconsin        0    0    0    0    0
## Wyoming          0    0    0    0    0
## California       1    1    1    1    1

summary(setup)

##    Length Class  Mode   
## Y  1209   -none- numeric
## N0    1   -none- numeric
## T0    1   -none- numeric
## W  1209   -none- numeric

2. Estimation

The main function is synthdid_estimate().

tau.hat <- synthdid_estimate(
  Y = setup$Y,
  N0 = setup$N0,
  T0 = setup$T0
)

# check
tau.hat

## synthdid: -15.604 +- NA. Effective N0/N0 = 16.4/38~0.4. Effective T0/T0 = 2.8/19~0.1. N1,T1 = 1,12.

How it works?

Algorithm 1 in the paper

Results

# summary
summary(tau.hat)

## $estimate
## [1] -15.60383
## 
## $se
##      [,1]
## [1,]   NA
## 
## $controls
##                estimate 1
## Nevada              0.124
## New Hampshire       0.105
## Connecticut         0.078
## Delaware            0.070
## Colorado            0.058
## Illinois            0.053
## Nebraska            0.048
## Montana             0.045
## Utah                0.042
## New Mexico          0.041
## Minnesota           0.039
## Wisconsin           0.037
## West Virginia       0.034
## North Carolina      0.033
## Idaho               0.031
## Ohio                0.031
## Maine               0.028
## Iowa                0.026
## 
## $periods
##      estimate 1
## 1988      0.427
## 1986      0.366
## 1987      0.206
## 
## $dimensions
##           N1           N0 N0.effective           T1           T0 T0.effective 
##        1.000       38.000       16.388       12.000       19.000        2.783

Compute Confidence Interval

?synthdid:::vcov.synthdid_estimate()

# note: "placebo" is the only option that works for only one treated unit
se <- sqrt(vcov(tau.hat, method = "placebo"))

ci <- c(
  lower = as.numeric(tau.hat - 1.96 * se),
  upper = as.numeric(tau.hat + 1.96 * se)
)

round(c(ATT = as.numeric(tau.hat), SE = se, ci), 3)

##     ATT      SE   lower   upper 
## -15.604   8.368 -32.005   0.797

3. Diagnostic Plots

?synthdid:::plot.synthdid_estimate()
?synthdid::synthdid_plot()

p1 <- plot(tau.hat, se.method = "placebo")

p1 +
  labs(title = "treatment effect")

The control unit contribution plot.

p2 <- synthdid_units_plot(tau.hat, se.method = "placebo")

p2 +
  labs(
    caption = "observe weights for each control states"
  )

Checking for pre-treatment parallel trends：

When the synthetic control and treated trajectories are far from one another, it can be hard to see how parallel they are. Pass overlay=1 to plot to overlay them.

plot(tau.hat, overlay = 1, se.method = "placebo")

4. Compare to Other Estimators

# synthetic control
tau.sc <- sc_estimate(setup$Y, setup$N0, setup$T0)

# did
tau.did <- did_estimate(setup$Y, setup$N0, setup$T0)

# results
estimates <- list(tau.did, tau.sc, tau.hat)

names(estimates) <- c("Diff-in-Diff", "Synthetic Control", "Synthetic Diff-in-Diff")

estimates

## $`Diff-in-Diff`
## synthdid: -27.349 +- NA. Effective N0/N0 = 38.0/38~1.0. Effective T0/T0 = 19.0/19~1.0. N1,T1 = 1,12. 
## 
## $`Synthetic Control`
## synthdid: -19.620 +- NA. Effective N0/N0 = 3.8/38~0.1. Effective T0/T0 = Inf/19~Inf. N1,T1 = 1,12. 
## 
## $`Synthetic Diff-in-Diff`
## synthdid: -15.604 +- NA. Effective N0/N0 = 16.4/38~0.4. Effective T0/T0 = 2.8/19~0.1. N1,T1 = 1,12.

print(unlist(estimates))

##           Diff-in-Diff      Synthetic Control Synthetic Diff-in-Diff 
##              -27.34911              -19.61966              -15.60383

synthdid_units_plot(estimates, se.method = "placebo")

Synthetic Difference in Differences – synthdid R package