Exploratory Data Analysis V - Reproducing German Reunification with Generalized Synthetic Control Method (GSCM)

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(loo)

## This is loo version 2.5.1

## - Online documentation and vignettes at mc-stan.org/loo

## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.

library(foreign)
library(Synth)

## ##
## ## Synth Package: Implements Synthetic Control Methods.

## ## See https://web.stanford.edu/~jhain/synthpage.html for additional information.

library(ggplot2)

Importing the Dataset [1]

d <- read.dta("/Users/apple/Desktop/repgermany.dta")
d

Prepare Data for Training Model [2]

dataprep_init <-
  dataprep(foo = d, predictors = c("gdp","trade","infrate"), dependent = "gdp", 
           unit.variable = 1, time.variable = 3, 
           special.predictors = 
             list(list("industry", 1971:1980, c("mean")), 
                  list("schooling",c(1970,1975), c("mean")), 
                  list("invest70" ,1980, c("mean"))),
           treatment.identifier = 7, controls.identifier = unique(d$index)[-7], 
           time.predictors.prior = 1971:1980, time.optimize.ssr = 1981:1990, 
           unit.names.variable = 2, time.plot = 1960:2003)

Fit Training Model by Implementing GSCM [3]

synth_init <- 
  synth(data.prep.obj = dataprep_init, Margin.ipop = .005, 
        Sigf.ipop = 7, Bound.ipop = 6)

## 
## X1, X0, Z1, Z0 all come directly from dataprep object.
## 
## 
## **************** 
##  searching for synthetic control unit  
##  
## 
## **************** 
## **************** 
## **************** 
## 
## MSPE (LOSS V): 4580.3 
## 
## solution.v:
##  0.4416298 0.1341838 0.07154154 0.001468671 0.1066738 0.2445024 
## 
## solution.w:
##  0.1350214 6.08e-08 0.5072787 5.2e-08 4.151e-07 6.59e-08 4.59e-08 2.401e-07 6.34e-08 0.165922 0.1464777 3.5e-08 2.76e-08 3.25e-08 0.0452973 1.8933e-06

Prepare Data for Main Model

dataprep_main <-
  dataprep(foo = d, predictors = c("gdp","trade","infrate"), dependent = "gdp",
    unit.variable = 1, time.variable = 3,
    special.predictors = list(
      list("industry" ,1981:1990, c("mean")), 
      list("schooling",c(1980,1985), c("mean")), 
      list("invest80" ,1980, c("mean"))),
    treatment.identifier = 7, controls.identifier = unique(d$index)[-7],
    time.predictors.prior = 1981:1990, time.optimize.ssr = 1960:1989,
    unit.names.variable = 2, time.plot = 1960:2003)

## 
##  Missing data: treated unit; special predictor: special.industry.1981.1990 ; for period: 1990 
##  We ignore (na.rm = TRUE) all missing values for predictors.op.

Fit Main Model with v from Training Model (By Cross-Validation)

synth_main <- synth(data.prep.obj = dataprep_main,
                    custom.v = as.numeric(synth_init$solution.v))

## 
## X1, X0, Z1, Z0 all come directly from dataprep object.
## 
## 
## **************** 
##  optimization over w weights: computing synthtic control unit 
##  
## 
## 
## **************** 
##  v weights supplied manually: computing synthtic control unit 
##  
## 
## 
## **************** 
## **************** 
## **************** 
## 
## MSPE (LOSS V): 14313.62 
## 
## solution.v:
##  0.4416298 0.1341838 0.07154154 0.001468671 0.1066738 0.2445024 
## 
## solution.w:
##  0.2186942 0.0009650793 0.4175261 0.001169395 0.001050788 0.0008147026 0.0005029538 0.09028553 0.0005151888 0.1113089 0.1550799 0.0002918951 0.000302974 0.0005901991 0.0004689807 0.0004332223

synth_df <- synth.tab(dataprep.res = dataprep_main, synth.res = synth_main)

synth_df

## $tab.pred
##                               Treated Synthetic Sample Mean
## gdp                         15808.900 15802.240   13669.381
## trade                          56.778    56.939      59.831
## infrate                         2.595     3.495       7.617
## special.industry.1981.1990     34.538    34.387      33.794
## special.schooling.1980.1985    55.500    55.180      38.659
## special.invest80.1980          27.018    27.034      25.895
## 
## $tab.v
##                             v.weights
## gdp                             0.442
## trade                           0.134
## infrate                         0.072
## special.industry.1981.1990      0.001
## special.schooling.1980.1985     0.107
## special.invest80.1980           0.245
## 
## $tab.w
##    w.weights  unit.names unit.numbers
## 1      0.219         USA            1
## 2      0.001          UK            2
## 3      0.418     Austria            3
## 4      0.001     Belgium            4
## 5      0.001     Denmark            5
## 6      0.001      France            6
## 8      0.001       Italy            8
## 9      0.090 Netherlands            9
## 10     0.001      Norway           10
## 12     0.111 Switzerland           12
## 14     0.155       Japan           14
## 16     0.000      Greece           16
## 18     0.000    Portugal           18
## 19     0.001       Spain           19
## 20     0.000   Australia           20
## 21     0.000 New Zealand           21
## 
## $tab.loss
##           Loss W   Loss V
## [1,] 0.002503625 14313.62

Computation of Synthetic West Germany’s GDP Per Capita by Matrix Multiplication

dataprep_gdp <- dataprep_main$Y0

synth_weight <- synth_main$solution.w

synth_gdp <- dataprep_gdp %*% synth_weight

Forming Tabular Data for West Germany’s GDP Per Capita (Real and Synthetic)

d_germany <- filter(d, index == 7)

d_germany <- d_germany[, c("year", "gdp")]

rownames(d_germany) <- 1:44

d_germany <- rename_at(d_germany, "gdp", ~ "GDP_Real_West_Germany")

d_synth <- data.frame(synth_gdp)

d_synth <- mutate(d_synth, year = rownames(d_synth))

rownames(d_synth) <- 1:44

d_synth <- rename_at(d_synth, "w.weight", ~ "GDP_Synthetic_West_Germany")

d_synth <- transform(d_synth, year = as.integer(year))

d_synth_real <- full_join(d_germany, d_synth, by = "year")

d_synth_real

Visualization I

ggplot(d_synth_real) + 
  geom_line(aes(x = year, y = GDP_Real_West_Germany, color = "red2"), size = 1.2) + 
  geom_line(aes(x = year, y = GDP_Synthetic_West_Germany, color = "darkblue"), 
            linetype = "longdash", size = 1.2) + 
  geom_vline(xintercept = 1990, linetype = "dotted", color = "darkgrey", size = 2) +
  xlab("year") + ylab("Per Capita GDP (PPP, 2002 USD)") + 
  scale_color_identity(name = "Per Capita GDP", breaks = c("red2", "darkblue"), 
                       labels = c("Real West Germany", "Synthetic West Germany"), 
                       guide = "legend") + 
  theme(legend.position = c(0.85, 0.15)) +
  annotate("text", x = 1984, y = 30000, label = "German Reunification") + 
  ggtitle("Trends in Per Capita GDP: Real West Germany and West Germany under GSCM")

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Incorporating Difference GDP Per Capita between Real and Synthetic into Tabular Data

diff <- 
  d_synth_real$GDP_Real_West_Germany - d_synth_real$GDP_Synthetic_West_Germany

d_synth_real <- mutate(d_synth_real, diff = diff)

d_synth_real

Visualization II

ggplot(d_synth_real) + geom_line(aes(x = year, y = diff), col = "orange1", 
                                 linetype = "solid", size = 1.1) + 
  geom_vline(xintercept = 1990, linetype = "dashed", size = 0.5, col = "darkgrey") + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5, col = "darkgrey") +
  annotate("text", x = 1984, y = -700, label = "German Reunification") +
  ylab("difference in per-capita GDP (PPP, 2002 USD)") + 
  ggtitle("Per Capita GDP Difference between West Germany and West Germany under GSCM")

References

[1] Hainmueller, Jens, (2014), “Replication data for: Comparative Politics and the Synthetic Control Method”, https://doi.org/10.7910/DVN/24714, Harvard Dataverse, V2, UNF:5:AtEF45hDnFLetMIiv9tjpQ== [fileUNF]

[2] Hainmueller, Jens, (2014), “rep.r”, Replication data for: Comparative Politics and the Synthetic Control Method, https://doi.org/10.7910/DVN/24714/FX8ULY, Harvard Dataverse, V2

[3] Abadie, A., Diamond, A., & Hainmueller, J. (2011). Synth: An R Package for Synthetic Control Methods in Comparative Case Studies. Journal of Statistical Software, 42(13), 1–17. https://doi.org/10.18637/jss.v042.i13

Exploratory Data Analysis V - Reproducing German Reunification with Generalized Synthetic Control Method (GSCM)

Zhongming Jiang

2023-06-07