Data download

The example data is based on the publication article:

Cisneros, E., Zhou, S. L., and Börner, J. (2015): Naming and Shaming for Conservation: Evidence from the Brazilian Amazon, PLoS ONE, 10(9), 1 - 24, https://doi.org/10.1371/journal.pone.0136402

The paper analyses the the effect of the blacklisting policy in the Brazilian Amazon on deforestation rates. Here we do not replicate the results of the paper, but produce a minimal dataset to demonstrate estimate the effect of the policy and predict the potential deforestation outcomes under two scenarios:

a) What whould have happend without the policy, i.e. how much more deforestation would have occured. 

b) How much more deforestaion would ahve occured if the effect of the policy was 1.64 standard deviations below the average effect size.
library(dplyr)
library(lfe)

#download data
t<-tempfile()
tt<-tempdir()
download.file("https://doi.org/10.1371/journal.pone.0136402.s001",destfile=t)
unzip(zipfile=t,files="blacklist_data_20150623.csv",exdir=tt)
#read
d<-read.csv(file.path(tt,"blacklist_data_20150623.csv"))
#new variables
d$id<-d$municip_geocode_id
d$treat<-I(d$emb_cont_share)*1
#minimal data set
vlist<-c(
  "id","year","def_area","treat","gdppc_l1","nibamafines_no_l1"
)
d<-d[,vlist]
#logarithmic transformations (inv. hyp. sine)
for (v in c("def_area","treat","gdppc_l1","nibamafines_no_l1")){
  d[,paste0("asinh.",v)]<-asinh(d[,v])
}
#create first differences
a<-lapply(
  split(d,d$id),
  function(s){
    # s<-split(d,d$id)[[1]]
    n<-c("def_area","treat","gdppc_l1","nibamafines_no_l1")
    n<-c(n,paste0("asinh.",n))
    for (v in n){
      # print(v)
      s[,paste0("d.",v)]<-s[,v]-dplyr::lag(s[,v])
    }
    return(s)
  }
)
d<-do.call("rbind",a)
#ordering is crucial
d<-d[order(d$id,d$year),]
#save
d.base<-d
# names(d)

The panel data set comprises 492 orginally forested municipalities (id) between the years 2002 and 2012. The treatment indicator, treat, turns one when a municipality was set on a deforestation blacklist. def_area is the yearly measure of deforestation at the muncipality level in square meters. gdppc_l1 denotes a the one-year lagged per capita GDP and nibamafines_no_l1 are the lagged number of fines issued by the environmental enforcement agency IBAMA.

summary(d[,c("id","def_area","treat","gdppc_l1","nibamafines_no_l1")])
##        id             def_area             treat            gdppc_l1     
##  Min.   :1100015   Min.   :0.000e+00   Min.   :0.00000   Min.   :  1314  
##  1st Qu.:1303945   1st Qu.:1.567e+06   1st Qu.:0.00000   1st Qu.:  3935  
##  Median :1506252   Median :6.180e+06   Median :0.00000   Median :  6448  
##  Mean   :2158849   Mean   :2.830e+07   Mean   :0.03575   Mean   :  9318  
##  3rd Qu.:2105456   3rd Qu.:2.504e+07   3rd Qu.:0.00000   3rd Qu.: 11273  
##  Max.   :5108956   Max.   :1.308e+09   Max.   :1.00000   Max.   :158973  
##  nibamafines_no_l1
##  Min.   :   0.00  
##  1st Qu.:   0.00  
##  Median :   5.00  
##  Mean   :  17.63  
##  3rd Qu.:  16.00  
##  Max.   :2218.00

Fixed effect (FE) estimation

Estimation

f<-"asinh.def_area ~ treat + asinh.gdppc_l1 + asinh.nibamafines_no_l1 | id + year | 0 | id"
f<-as.formula(f)
e<-felm(f,d)
summary(e)
## 
## Call:
##    felm(formula = f, data = d) 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.6555  -0.5339   0.0512   0.5920  15.4193 
## 
## Coefficients:
##                         Estimate Cluster s.e. t value Pr(>|t|)   
## treat                   -0.51441      0.16799  -3.062  0.00221 **
## asinh.gdppc_l1           0.40414      0.17873   2.261  0.02379 * 
## asinh.nibamafines_no_l1 -0.01168      0.03195  -0.366  0.71472   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.041 on 4907 degrees of freedom
## Multiple R-squared(full model): 0.687   Adjusted R-squared: 0.6549 
## Multiple R-squared(proj model): 0.002256   Adjusted R-squared: -0.1002 
## F-statistic(full model, *iid*):21.37 on 504 and 4907 DF, p-value: < 2.2e-16 
## F-statistic(proj model):  4.95 on 3 and 491 DF, p-value: 0.002147
e.base<-e

Add fixed effects to the data frame

d<-d.base
e<-e.base
fe<-getfe(e)
table(fe$fe) # two types of fixed effects
## 
##   id year 
##  492   11
# split up fixed effects in two objects
fe.year<-fe[fe$fe=="year",]
fe.id<-fe[fe$fe=="id",]
names(fe.year)[1]<-"fe.year"
names(fe.id)[1]<-"fe.id"
# add FE effects ot data frame
d<-merge(d,fe.id[,c("idx","fe.id")]  ,by.x="id",by.y="idx",all.x=T)
d<-merge(d,fe.year[,c("idx","fe.year")],by.x="year",by.y="idx",all.x=T)
d<-d[order(d$id,d$year),]
d.fe<-d

Add fitted values to the data frame

d<-d.fe
e<-e.base
d$p1 <- c(e$fitted.values)
d.fit<-d

Check if prediction works

x<-d[,c("treat","asinh.gdppc_l1","asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% e$coefficients + d[,"fe.year"] +d[,"fe.id"]   )
table(round(d$p1-d$p2,8))
## 
##    0 
## 5412

Prediction with manipulated data set

d<-d.fit
e<-e.base
# set treatment to zero
d$treat<-0
x<-d[,c("treat","asinh.gdppc_l1","asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% e$coefficients + d[,"fe.year"] +d[,"fe.id"]   )
#impact in level
(sum(sinh(d$p1))-sum(sinh(d$p2)))/1000^2 # 7902 sqkm of saved forest
## [1] -7902.038

Prediction with manipulated estimates

d<-d.fit
e<-e.base
# reduce effect size by 1.67 to the lower bound of the 90% conf. interval
est<-coefficients(e)
est["treat"]<-est["treat"] + 1.645 * sqrt(diag(vcov(e)))["treat"]
x<-d[,c("treat","asinh.gdppc_l1","asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% est + d[,"fe.year"] +d[,"fe.id"]   )
#impact in level
(sum(sinh(d$p1))-sum(sinh(d$p2)))/1000^2 # 3813 sqkm of saved forest
## [1] -3813.088

Out of sample prediction

Note: Predictions after FE estimations necessarly need fixed effect values per unit - here: d[,"fe.id"]. That means, that it is not possible to predict out of sample into new observations. One would have to assume sume unit-FE (or try to predict such unit-FE) or switch to a first difference estimation, which only uses year-fixed-effects.

First-Difference FD estimation

Estimation

f<-"d.asinh.def_area ~ d.treat + d.asinh.gdppc_l1 + d.asinh.nibamafines_no_l1 | year | 0 | id"
f<-as.formula(f)
e<-felm(f,d)
summary(e)
## 
## Call:
##    felm(formula = f, data = d) 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.1858  -0.4537   0.0817   0.5228  19.4110 
## 
## Coefficients:
##                           Estimate Cluster s.e. t value Pr(>|t|)    
## d.treat                   -0.86766      0.19375  -4.478  7.7e-06 ***
## d.asinh.gdppc_l1          -0.01062      0.13746  -0.077    0.938    
## d.asinh.nibamafines_no_l1  0.03621      0.02182   1.660    0.097 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.947 on 4907 degrees of freedom
##   (492 observations deleted due to missingness)
## Multiple R-squared(full model): 0.05821   Adjusted R-squared: 0.05591 
## Multiple R-squared(proj model): 0.001532   Adjusted R-squared: -0.0009098 
## F-statistic(full model, *iid*):25.28 on 12 and 4907 DF, p-value: < 2.2e-16 
## F-statistic(proj model): 7.567 on 3 and 491 DF, p-value: 5.886e-05
e.base<-e

Add fixed effects to the data frame

d<-d.base
e<-e.base
fe<-getfe(e)
table(fe$fe) # here only year fixed effects
## 
## year 
##   10
fe.year<-fe[fe$fe=="year",]
names(fe.year)[1]<-"fe.year"
# add FE effects ot data frame
d<-merge(d,fe.year[,c("idx","fe.year")],by.x="year",by.y="idx",all.x=T)
d<-d[order(d$id,d$year),]
d.fe<-d

Add fitted values to the data frame

d<-d.fe
e<-e.base
z<-data.frame(
  id = e$clustervar$id,
  year = e$fe$year,
  p1 = c(e$fitted.values)
)
d<-merge(d,z,by=c("id","year"),all.x=T)
# d[d$id==1100015,]
# prediction in level 
a<-lapply(
  split(d,d$id),
  function(s){
    # s<-split(d,d$id)[[1]]
      s[2,"x"]<-s[1,"asinh.def_area"]+s[2,"p1"]
    for (i in 3:nrow(s)){
      s[i,"x"]<-s[(i-1),"x"]+s[i,"p1"]
    }
    s$p1l<-sinh(s$x)
    s$x<-NULL
    return(s)
  }
)
d<-do.call("rbind",a)
d.fit<-d

Check if prediction works

d<-d.fit
e<-e.base
x<-d[,c("d.treat","d.asinh.gdppc_l1","d.asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% e$coefficients + d[,"fe.year"]   )
table(round(d$p1-d$p2,12),useNA="always")
## 
##    0 <NA> 
## 4920  492

Prediction with manipulated data set

d<-d.fit
e<-e.base
# set treatment to zero
d$treat<-0
d$d.treat<-0
x<-d[,c("d.treat","d.asinh.gdppc_l1","d.asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% e$coefficients + d[,"fe.year"]   )
# prediction in level 
a<-lapply(
  split(d,d$id),
  function(s){
    # s<-split(d,d$id)[[1]]
      s[2,"x"]<-s[1,"asinh.def_area"]+s[2,"p2"]
    for (i in 3:nrow(s)){
      s[i,"x"]<-s[(i-1),"x"]+s[i,"p2"]
    }
    s$p2l<-sinh(s$x)
    s$x<-NULL
    return(s)
  }
)
d<-do.call("rbind",a)
#impact in level
(sum(d$p1l,na.rm=T)-sum(d$p2l,na.rm=T))/1000^2 
## [1] -9512.9

Prediction with manipulated estimates

d<-d.fit
e<-e.base
# reduce effect size by 1.67 to the lower bound of the 90% conf. interval
est<-coefficients(e)
est["d.treat"]<-est["d.treat"] + 1.645 * sqrt(diag(vcov(e)))["d.treat"]
x<-d[,c("d.treat","d.asinh.gdppc_l1","d.asinh.nibamafines_no_l1")]
d$p2 <- c(as.matrix(x) %*% est + d[,"fe.year"]   )
# prediction in level 
a<-lapply(
  split(d,d$id),
  function(s){
    # s<-split(d,d$id)[[1]]
      s[2,"x"]<-s[1,"asinh.def_area"]+s[2,"p2"]
    for (i in 3:nrow(s)){
      s[i,"x"]<-s[(i-1),"x"]+s[i,"p2"]
    }
    s$p2l<-sinh(s$x)
    s$x<-NULL
    return(s)
  }
)
d<-do.call("rbind",a)
#impact in level
(sum(d$p1l,na.rm=T)-sum(d$p2l,na.rm=T))/1000^2 
## [1] -2718.87

Out of sample prediction

Note: Predictions after FD estimations only need year fixed effects. That means, that it is possible to predict out of sample into new observations.