The example data is based on the publication article:
Cisneros, E., Zhou, S. L., and Börner, J. (2015): Naming and Shaming for Conservation: Evidence from the Brazilian Amazon, PLoS ONE, 10(9), 1 - 24, https://doi.org/10.1371/journal.pone.0136402
The paper analyses the the effect of the blacklisting policy in the Brazilian Amazon on deforestation rates. Here we do not replicate the results of the paper, but produce a minimal dataset to demonstrate estimate the effect of the policy and predict the potential deforestation outcomes under two scenarios:
a) What whould have happend without the policy, i.e. how much more deforestation would have occured.
b) How much more deforestaion would ahve occured if the effect of the policy was 1.64 standard deviations below the average effect size.
library(dplyr)
library(lfe)
#download data
t<-tempfile()
tt<-tempdir()
download.file("https://doi.org/10.1371/journal.pone.0136402.s001",destfile=t)
unzip(zipfile=t,files="blacklist_data_20150623.csv",exdir=tt)
#read
d<-read.csv(file.path(tt,"blacklist_data_20150623.csv"))
#new variables
d$id<-d$municip_geocode_id
d$treat<-I(d$emb_cont_share)*1
#minimal data set
vlist<-c(
"id","year","def_area","treat","gdppc_l1","nibamafines_no_l1"
)
d<-d[,vlist]
#logarithmic transformations (inv. hyp. sine)
for (v in c("def_area","treat","gdppc_l1","nibamafines_no_l1")){
d[,paste0("asinh.",v)]<-asinh(d[,v])
}
#create first differences
a<-lapply(
split(d,d$id),
function(s){
# s<-split(d,d$id)[[1]]
n<-c("def_area","treat","gdppc_l1","nibamafines_no_l1")
n<-c(n,paste0("asinh.",n))
for (v in n){
# print(v)
s[,paste0("d.",v)]<-s[,v]-dplyr::lag(s[,v])
}
return(s)
}
)
d<-do.call("rbind",a)
#ordering is crucial
d<-d[order(d$id,d$year),]
#save
d.base<-d
# names(d)
The panel data set comprises 492 orginally forested municipalities (id) between the years 2002 and 2012. The treatment indicator, treat, turns one when a municipality was set on a deforestation blacklist. def_area is the yearly measure of deforestation at the muncipality level in square meters. gdppc_l1 denotes a the one-year lagged per capita GDP and nibamafines_no_l1 are the lagged number of fines issued by the environmental enforcement agency IBAMA.
summary(d[,c("id","def_area","treat","gdppc_l1","nibamafines_no_l1")])
## id def_area treat gdppc_l1
## Min. :1100015 Min. :0.000e+00 Min. :0.00000 Min. : 1314
## 1st Qu.:1303945 1st Qu.:1.567e+06 1st Qu.:0.00000 1st Qu.: 3935
## Median :1506252 Median :6.180e+06 Median :0.00000 Median : 6448
## Mean :2158849 Mean :2.830e+07 Mean :0.03575 Mean : 9318
## 3rd Qu.:2105456 3rd Qu.:2.504e+07 3rd Qu.:0.00000 3rd Qu.: 11273
## Max. :5108956 Max. :1.308e+09 Max. :1.00000 Max. :158973
## nibamafines_no_l1
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 5.00
## Mean : 17.63
## 3rd Qu.: 16.00
## Max. :2218.00
f<-"asinh.def_area ~ treat + asinh.gdppc_l1 + asinh.nibamafines_no_l1 | id + year | 0 | id"
f<-as.formula(f)
e<-felm(f,d)
summary(e)
##
## Call:
## felm(formula = f, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.6555 -0.5339 0.0512 0.5920 15.4193
##
## Coefficients:
## Estimate Cluster s.e. t value Pr(>|t|)
## treat -0.51441 0.16799 -3.062 0.00221 **
## asinh.gdppc_l1 0.40414 0.17873 2.261 0.02379 *
## asinh.nibamafines_no_l1 -0.01168 0.03195 -0.366 0.71472
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.041 on 4907 degrees of freedom
## Multiple R-squared(full model): 0.687 Adjusted R-squared: 0.6549
## Multiple R-squared(proj model): 0.002256 Adjusted R-squared: -0.1002
## F-statistic(full model, *iid*):21.37 on 504 and 4907 DF, p-value: < 2.2e-16
## F-statistic(proj model): 4.95 on 3 and 491 DF, p-value: 0.002147
e.base<-e
d<-d.base
e<-e.base
fe<-getfe(e)
table(fe$fe) # two types of fixed effects
##
## id year
## 492 11
# split up fixed effects in two objects
fe.year<-fe[fe$fe=="year",]
fe.id<-fe[fe$fe=="id",]
names(fe.year)[1]<-"fe.year"
names(fe.id)[1]<-"fe.id"
# add FE effects ot data frame
d<-merge(d,fe.id[,c("idx","fe.id")] ,by.x="id",by.y="idx",all.x=T)
d<-merge(d,fe.year[,c("idx","fe.year")],by.x="year",by.y="idx",all.x=T)
d<-d[order(d$id,d$year),]
d.fe<-d
d<-d.fe
e<-e.base
d$p1 <- c(e$fitted.values)
d.fit<-d
x<-d[,c("treat","asinh.gdppc_l1","asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% e$coefficients + d[,"fe.year"] +d[,"fe.id"] )
table(round(d$p1-d$p2,8))
##
## 0
## 5412
d<-d.fit
e<-e.base
# set treatment to zero
d$treat<-0
x<-d[,c("treat","asinh.gdppc_l1","asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% e$coefficients + d[,"fe.year"] +d[,"fe.id"] )
#impact in level
(sum(sinh(d$p1))-sum(sinh(d$p2)))/1000^2 # 7902 sqkm of saved forest
## [1] -7902.038
d<-d.fit
e<-e.base
# reduce effect size by 1.67 to the lower bound of the 90% conf. interval
est<-coefficients(e)
est["treat"]<-est["treat"] + 1.645 * sqrt(diag(vcov(e)))["treat"]
x<-d[,c("treat","asinh.gdppc_l1","asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% est + d[,"fe.year"] +d[,"fe.id"] )
#impact in level
(sum(sinh(d$p1))-sum(sinh(d$p2)))/1000^2 # 3813 sqkm of saved forest
## [1] -3813.088
Note: Predictions after FE estimations necessarly need fixed effect values per unit - here: d[,"fe.id"]. That means, that it is not possible to predict out of sample into new observations. One would have to assume sume unit-FE (or try to predict such unit-FE) or switch to a first difference estimation, which only uses year-fixed-effects.
f<-"d.asinh.def_area ~ d.treat + d.asinh.gdppc_l1 + d.asinh.nibamafines_no_l1 | year | 0 | id"
f<-as.formula(f)
e<-felm(f,d)
summary(e)
##
## Call:
## felm(formula = f, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.1858 -0.4537 0.0817 0.5228 19.4110
##
## Coefficients:
## Estimate Cluster s.e. t value Pr(>|t|)
## d.treat -0.86766 0.19375 -4.478 7.7e-06 ***
## d.asinh.gdppc_l1 -0.01062 0.13746 -0.077 0.938
## d.asinh.nibamafines_no_l1 0.03621 0.02182 1.660 0.097 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.947 on 4907 degrees of freedom
## (492 observations deleted due to missingness)
## Multiple R-squared(full model): 0.05821 Adjusted R-squared: 0.05591
## Multiple R-squared(proj model): 0.001532 Adjusted R-squared: -0.0009098
## F-statistic(full model, *iid*):25.28 on 12 and 4907 DF, p-value: < 2.2e-16
## F-statistic(proj model): 7.567 on 3 and 491 DF, p-value: 5.886e-05
e.base<-e
d<-d.base
e<-e.base
fe<-getfe(e)
table(fe$fe) # here only year fixed effects
##
## year
## 10
fe.year<-fe[fe$fe=="year",]
names(fe.year)[1]<-"fe.year"
# add FE effects ot data frame
d<-merge(d,fe.year[,c("idx","fe.year")],by.x="year",by.y="idx",all.x=T)
d<-d[order(d$id,d$year),]
d.fe<-d
d<-d.fe
e<-e.base
z<-data.frame(
id = e$clustervar$id,
year = e$fe$year,
p1 = c(e$fitted.values)
)
d<-merge(d,z,by=c("id","year"),all.x=T)
# d[d$id==1100015,]
# prediction in level
a<-lapply(
split(d,d$id),
function(s){
# s<-split(d,d$id)[[1]]
s[2,"x"]<-s[1,"asinh.def_area"]+s[2,"p1"]
for (i in 3:nrow(s)){
s[i,"x"]<-s[(i-1),"x"]+s[i,"p1"]
}
s$p1l<-sinh(s$x)
s$x<-NULL
return(s)
}
)
d<-do.call("rbind",a)
d.fit<-d
d<-d.fit
e<-e.base
x<-d[,c("d.treat","d.asinh.gdppc_l1","d.asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% e$coefficients + d[,"fe.year"] )
table(round(d$p1-d$p2,12),useNA="always")
##
## 0 <NA>
## 4920 492
d<-d.fit
e<-e.base
# set treatment to zero
d$treat<-0
d$d.treat<-0
x<-d[,c("d.treat","d.asinh.gdppc_l1","d.asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% e$coefficients + d[,"fe.year"] )
# prediction in level
a<-lapply(
split(d,d$id),
function(s){
# s<-split(d,d$id)[[1]]
s[2,"x"]<-s[1,"asinh.def_area"]+s[2,"p2"]
for (i in 3:nrow(s)){
s[i,"x"]<-s[(i-1),"x"]+s[i,"p2"]
}
s$p2l<-sinh(s$x)
s$x<-NULL
return(s)
}
)
d<-do.call("rbind",a)
#impact in level
(sum(d$p1l,na.rm=T)-sum(d$p2l,na.rm=T))/1000^2
## [1] -9512.9
d<-d.fit
e<-e.base
# reduce effect size by 1.67 to the lower bound of the 90% conf. interval
est<-coefficients(e)
est["d.treat"]<-est["d.treat"] + 1.645 * sqrt(diag(vcov(e)))["d.treat"]
x<-d[,c("d.treat","d.asinh.gdppc_l1","d.asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% est + d[,"fe.year"] )
# prediction in level
a<-lapply(
split(d,d$id),
function(s){
# s<-split(d,d$id)[[1]]
s[2,"x"]<-s[1,"asinh.def_area"]+s[2,"p2"]
for (i in 3:nrow(s)){
s[i,"x"]<-s[(i-1),"x"]+s[i,"p2"]
}
s$p2l<-sinh(s$x)
s$x<-NULL
return(s)
}
)
d<-do.call("rbind",a)
#impact in level
(sum(d$p1l,na.rm=T)-sum(d$p2l,na.rm=T))/1000^2
## [1] -2718.87
Note: Predictions after FD estimations only need year fixed effects. That means, that it is possible to predict out of sample into new observations.