library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(loo)
## This is loo version 2.5.1
## - Online documentation and vignettes at mc-stan.org/loo
## - As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session.
library(foreign)
library(Synth)
## ##
## ## Synth Package: Implements Synthetic Control Methods.
## ## See https://web.stanford.edu/~jhain/synthpage.html for additional information.
library(ggplot2)
Importing the Dataset [1]
d <- read.dta("/Users/apple/Desktop/repgermany.dta")
d
Prepare Data for Training Model [2]
dataprep_init <-
dataprep(foo = d, predictors = c("gdp","trade","infrate"), dependent = "gdp",
unit.variable = 1, time.variable = 3,
special.predictors =
list(list("industry", 1971:1980, c("mean")),
list("schooling",c(1970,1975), c("mean")),
list("invest70" ,1980, c("mean"))),
treatment.identifier = 7, controls.identifier = unique(d$index)[-7],
time.predictors.prior = 1971:1980, time.optimize.ssr = 1981:1990,
unit.names.variable = 2, time.plot = 1960:2003)
Fit Training Model by Implementing GSCM [3]
synth_init <-
synth(data.prep.obj = dataprep_init, Margin.ipop = .005,
Sigf.ipop = 7, Bound.ipop = 6)
##
## X1, X0, Z1, Z0 all come directly from dataprep object.
##
##
## ****************
## searching for synthetic control unit
##
##
## ****************
## ****************
## ****************
##
## MSPE (LOSS V): 4580.3
##
## solution.v:
## 0.4416298 0.1341838 0.07154154 0.001468671 0.1066738 0.2445024
##
## solution.w:
## 0.1350214 6.08e-08 0.5072787 5.2e-08 4.151e-07 6.59e-08 4.59e-08 2.401e-07 6.34e-08 0.165922 0.1464777 3.5e-08 2.76e-08 3.25e-08 0.0452973 1.8933e-06
Prepare Data for Main Model
dataprep_main <-
dataprep(foo = d, predictors = c("gdp","trade","infrate"), dependent = "gdp",
unit.variable = 1, time.variable = 3,
special.predictors = list(
list("industry" ,1981:1990, c("mean")),
list("schooling",c(1980,1985), c("mean")),
list("invest80" ,1980, c("mean"))),
treatment.identifier = 7, controls.identifier = unique(d$index)[-7],
time.predictors.prior = 1981:1990, time.optimize.ssr = 1960:1989,
unit.names.variable = 2, time.plot = 1960:2003)
##
## Missing data: treated unit; special predictor: special.industry.1981.1990 ; for period: 1990
## We ignore (na.rm = TRUE) all missing values for predictors.op.
Fit Main Model with v from Training Model (By Cross-Validation)
synth_main <- synth(data.prep.obj = dataprep_main,
custom.v = as.numeric(synth_init$solution.v))
##
## X1, X0, Z1, Z0 all come directly from dataprep object.
##
##
## ****************
## optimization over w weights: computing synthtic control unit
##
##
##
## ****************
## v weights supplied manually: computing synthtic control unit
##
##
##
## ****************
## ****************
## ****************
##
## MSPE (LOSS V): 14313.62
##
## solution.v:
## 0.4416298 0.1341838 0.07154154 0.001468671 0.1066738 0.2445024
##
## solution.w:
## 0.2186942 0.0009650793 0.4175261 0.001169395 0.001050788 0.0008147026 0.0005029538 0.09028553 0.0005151888 0.1113089 0.1550799 0.0002918951 0.000302974 0.0005901991 0.0004689807 0.0004332223
synth_df <- synth.tab(dataprep.res = dataprep_main, synth.res = synth_main)
synth_df
## $tab.pred
## Treated Synthetic Sample Mean
## gdp 15808.900 15802.240 13669.381
## trade 56.778 56.939 59.831
## infrate 2.595 3.495 7.617
## special.industry.1981.1990 34.538 34.387 33.794
## special.schooling.1980.1985 55.500 55.180 38.659
## special.invest80.1980 27.018 27.034 25.895
##
## $tab.v
## v.weights
## gdp 0.442
## trade 0.134
## infrate 0.072
## special.industry.1981.1990 0.001
## special.schooling.1980.1985 0.107
## special.invest80.1980 0.245
##
## $tab.w
## w.weights unit.names unit.numbers
## 1 0.219 USA 1
## 2 0.001 UK 2
## 3 0.418 Austria 3
## 4 0.001 Belgium 4
## 5 0.001 Denmark 5
## 6 0.001 France 6
## 8 0.001 Italy 8
## 9 0.090 Netherlands 9
## 10 0.001 Norway 10
## 12 0.111 Switzerland 12
## 14 0.155 Japan 14
## 16 0.000 Greece 16
## 18 0.000 Portugal 18
## 19 0.001 Spain 19
## 20 0.000 Australia 20
## 21 0.000 New Zealand 21
##
## $tab.loss
## Loss W Loss V
## [1,] 0.002503625 14313.62
Computation of Synthetic West Germany’s GDP Per Capita by Matrix Multiplication
dataprep_gdp <- dataprep_main$Y0
synth_weight <- synth_main$solution.w
synth_gdp <- dataprep_gdp %*% synth_weight
Forming Tabular Data for West Germany’s GDP Per Capita (Real and Synthetic)
d_germany <- filter(d, index == 7)
d_germany <- d_germany[, c("year", "gdp")]
rownames(d_germany) <- 1:44
d_germany <- rename_at(d_germany, "gdp", ~ "GDP_Real_West_Germany")
d_synth <- data.frame(synth_gdp)
d_synth <- mutate(d_synth, year = rownames(d_synth))
rownames(d_synth) <- 1:44
d_synth <- rename_at(d_synth, "w.weight", ~ "GDP_Synthetic_West_Germany")
d_synth <- transform(d_synth, year = as.integer(year))
d_synth_real <- full_join(d_germany, d_synth, by = "year")
d_synth_real
Visualization I
ggplot(d_synth_real) +
geom_line(aes(x = year, y = GDP_Real_West_Germany, color = "red2"), size = 1.2) +
geom_line(aes(x = year, y = GDP_Synthetic_West_Germany, color = "darkblue"),
linetype = "longdash", size = 1.2) +
geom_vline(xintercept = 1990, linetype = "dotted", color = "darkgrey", size = 2) +
xlab("year") + ylab("Per Capita GDP (PPP, 2002 USD)") +
scale_color_identity(name = "Per Capita GDP", breaks = c("red2", "darkblue"),
labels = c("Real West Germany", "Synthetic West Germany"),
guide = "legend") +
theme(legend.position = c(0.85, 0.15)) +
annotate("text", x = 1984, y = 30000, label = "German Reunification") +
ggtitle("Trends in Per Capita GDP: Real West Germany and West Germany under GSCM")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Incorporating Difference GDP Per Capita between Real and Synthetic into Tabular Data
diff <-
d_synth_real$GDP_Real_West_Germany - d_synth_real$GDP_Synthetic_West_Germany
d_synth_real <- mutate(d_synth_real, diff = diff)
d_synth_real
Visualization II
ggplot(d_synth_real) + geom_line(aes(x = year, y = diff), col = "orange1",
linetype = "solid", size = 1.1) +
geom_vline(xintercept = 1990, linetype = "dashed", size = 0.5, col = "darkgrey") +
geom_hline(yintercept = 0, linetype = "dashed", size = 0.5, col = "darkgrey") +
annotate("text", x = 1984, y = -700, label = "German Reunification") +
ylab("difference in per-capita GDP (PPP, 2002 USD)") +
ggtitle("Per Capita GDP Difference between West Germany and West Germany under GSCM")
References
[1] Hainmueller, Jens, (2014), “Replication data for: Comparative Politics and the Synthetic Control Method”, https://doi.org/10.7910/DVN/24714, Harvard Dataverse, V2, UNF:5:AtEF45hDnFLetMIiv9tjpQ== [fileUNF]
[2] Hainmueller, Jens, (2014), “rep.r”, Replication data for: Comparative Politics and the Synthetic Control Method, https://doi.org/10.7910/DVN/24714/FX8ULY, Harvard Dataverse, V2
[3] Abadie, A., Diamond, A., & Hainmueller, J. (2011). Synth: An R Package for Synthetic Control Methods in Comparative Case Studies. Journal of Statistical Software, 42(13), 1–17. https://doi.org/10.18637/jss.v042.i13