DATA collected at Osservatorio Economico della Sardegna

Enterprises are sorted by score (p variable)

source("read-dat.R")
head(dat)
##                 i1          i2       i3 i4         p z d ey0 ey3  py0  py3 dy0
## 276/99   0.3000000 0.000000000 1.000000 19 -4.645011 0 0  10  NA 3616   NA  NA
## 343/99   0.3000000 0.002730963 1.000000 21 -3.606017 0 0  NA  NA   NA  315  NA
## 257/99   0.3000000 0.006521739 1.000000 23 -3.293505 0 0  NA  NA   NA   NA  NA
## D1500264 0.3000344 0.001377885 1.000000 22 -3.237792 0 1  18  21 7983 9409  NA
## 307/99   0.3214286 0.011127107 1.176471 21 -3.173211 0 0   1  NA  484   NA  NA
## D1500228 0.3227699 0.003129890 1.052632 23 -3.114608 0 0   2  NA  269  257  NA
##          dpy3
## 276/99     NA
## 343/99     NA
## 257/99     NA
## D1500264   NA
## 307/99     NA
## D1500228   NA

X.plus and X.minus estimator

These Functions returns the + (.plus) and - (.minus) in alpha.hat formula

minimi.plus=function(X,S,K,Z){
A=sum(K*Z*X)
B=sum(K*Z)
C=sum(Z*K*S)
D=sum(Z*K*S*X)
E=sum(K*Z*(S^2))
b.plus=(A*C-B*D)/(C^2-B*E)
x.plus=(A-b.plus*C)/B
return(c(x.plus,b.plus))}

minimi.minus=function(X,S,K,Z){
A=sum(K*(1-Z)*X)
B=sum(K*(1-Z))
C=sum((1-Z)*K*S)
D=sum((1-Z)*K*S*X)
E=sum(K*(1-Z)*(S^2))
b.minus=(A*C-B*D)/(C^2-B*E)
x.minus=(A-b.minus*C)/B
return(c(x.minus,b.minus))}

The Bootstrap

This function returns Calibrated BootStrap CI that is, the nominal coverage correspond to actua coverage

bootci=function(y,p,K,z,d,M=1000,init.seed=NULL){
    if(!is.null(init.seed)) set.seed(init.seed)
    alpha.hat=function(bdat){
      y=bdat$y
      p=bdat$p
      K=bdat$K
      z=bdat$z
      d=bdat$d
      alpha.hat=(minimi.plus(y,p,K,z)[1]-minimi.minus(y,p,K,z)[1])/
        (minimi.plus(d,p,K,z)[1]-minimi.minus(d,p,K,z)[1])
      return(alpha.hat)
    }
    bdat=data.frame(y=y,p=p,K=K,z=z,d=d)
    res=suppressWarnings (bcajack(x = bdat, B = M, func = alpha.hat, 
                                  m = nrow(bdat)/20,
            alpha = c(0.005,0.025,0.05),verbose = FALSE))
    return(c(res$lims[c(1,7),1],res$lims[c(2,6),1],
             res$lims[c(3,5),1]))
    }

Analysis with the Boostrap

Here we provide the results in the paper.

attach(dat)
p.bar=-0.41067715   # Threshold selection
n=length(p);n # Sample size used to estimate the weithing function
## [1] 215
p=p-p.bar   # Centered score
dens=density(p) # kernel density
K=rep(NA,n)
for(i in 1:n) K[i]=mean(c(dens$y[dens$x==min(dens$x[dens$x>=p[i]])],
                          dens$y[dens$x==max(dens$x[dens$x<=p[i]])]))
K=K/sum(K)

# Analysis of production value
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
y.p=log(py3/py0)[usable]    # Response variale: log-ratio
z.p=(p>0)[usable]     # Mandated status for the available sample
p.p=p[usable] # Score for available sample
d.p=d[usable] # Effective status for available sample
K.p=K[usable] # Weight for available sample
table(z.p,d.p) # Tables 1 and 2
##        d.p
## z.p      0  1
##   FALSE 26 13
##   TRUE   9 45
ypiu=minimi.plus(y.p,p.p,K.p,z.p) # y+ estimate
ymeno=minimi.minus(y.p,p.p,K.p,z.p) # y- estimate 
dpiu=minimi.plus(d.p,p.p,K.p,z.p) # d+ estimate
dmeno=minimi.minus(d.p,p.p,K.p,z.p) # d- estimate
cat("Estimated proportion of Compliers: ",dpiu[1]-dmeno[1],"\n")
## Estimated proportion of Compliers:  0.5822739
alpha.hat.p=(ypiu[1]-ymeno[1])/(dpiu[1]-dmeno[1]) # alpha.hat
alpha.hat.p
## [1] 0.4442994
exp(alpha.hat.p)
## [1] 1.559397
# Boot Confidence Interval for effect on Production Value
bootci(y=y.p,p=p.p,K=K.p,z=z.p,d=d.p,M=100000,init.seed=17)
## 0.005 0.995 0.025 0.975  0.05  0.95 
##    NA    NA    NA    NA    NA    NA
# Analysis of Employement
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
y.e=log(ey3/ey0)[usable]    # Response variale: log-ratio
z.e=(p>0)[usable]     # Mandated status for the available sample
p.e=p[usable] # Score for available sample
d.e=d[usable] # Effective status for available sample
K.e=K[usable] # Weight for available sample
table(z.e,d.e) # Tables 1 and 2
##        d.e
## z.e      0  1
##   FALSE 22 13
##   TRUE   8 39
ypiu=minimi.plus(y.e,p.e,K.e,z.e) # y+ estimate
ymeno=minimi.minus(y.e,p.e,K.e,z.e) # y- estimate 
dpiu=minimi.plus(d.e,p.e,K.e,z.e) # d+ estimate
dmeno=minimi.minus(d.e,p.e,K.e,z.e) # d- estimate
cat("Estimated proportion of Compliers: ",dpiu[1]-dmeno[1],"\n")
## Estimated proportion of Compliers:  0.5589891
alpha.hat.e=(ypiu[1]-ymeno[1])/(dpiu[1]-dmeno[1]) # alpha.hat
alpha.hat.e
## [1] 0.5666168
exp(alpha.hat.e)
## [1] 1.762295
# Boot Confidence Interval for effect on Employement
bootci(y=y.e,p=p.e,K=K.e,z=z.e,d=d.e,M=100000,init.seed=17)
##       0.005       0.995       0.025       0.975        0.05        0.95 
## -0.87050162  5.68336626 -0.14282487  2.44847531 -0.03279575  1.82225586
# Figure 1

#pdf(file="results.pdf")
hl=-4
par(mfrow=c(2,1),mar=c(5, 5, 4, 2)-1)
# Figure Results for Employement
plot(p.e+p.bar,y.e,pch=21-d.e*2,xlab="p",ylab="y",
     main="(a) Effect on Occupation",ylim=c(hl,5),xlim=c(-3,4))
abline(v=p.bar,lty=2)
abline(h=0)
legend(2,5,c("Treated","Non-treated"),pch=c(19,21))
lines(p.e[p.e>0]+p.bar,ypiu[1]+ypiu[2]*( p.e[p.e>0]+p.bar),lwd=2)
lines(p.e[p.e<0]+p.bar,ymeno[1]+ymeno[2]*( p.e[p.e<0]+p.bar),lwd=2)
tonalita.grigio= grey(1-K.e/max(K.e))
for(i in 1:(length(p.e)-1)) lines(c(p.e[i]+p.bar, p.e[i+1]+p.bar),
                                  c(hl,hl),col= tonalita.grigio[i],lwd=12)
tonalita.grigio= grey(K/max(K))
for(i in c(seq(1,length(p.e),by=8), 
           length(p.e))) text(p.e[i]+p.bar,hl,round(K[i],2),
                              col=tonalita.grigio[i],cex=0.6)
# Figure Results for Production Value
plot(p.p+p.bar,y.p,pch=21-d.p*2,xlab="p",ylab="y",
     main="(b) Effect on Production Value",ylim=c(hl,5),xlim=c(-3,4))
abline(v=p.bar,lty=2)
abline(h=0)
legend(2,5,c("Treated","Non-reated"),pch=c(19,21))
lines(p.p[p.p>0]+p.bar,ypiu[1]+ypiu[2]*( p.p[p.p>0]+p.bar),lwd=2)
lines(p.p[p.p<0]+p.bar,ymeno[1]+ymeno[2]*( p.p[p.p<0]+p.bar),lwd=2)
tonalita.grigio= grey(1-K.p/max(K.p))
for(i in 1:(length(p.p)-1)) lines(c(p.p[i]+p.bar, 
                                    p.p[i+1]+p.bar),c(hl,hl),
                                  col= tonalita.grigio[i],lwd=12)
tonalita.grigio= grey(K/max(K))
for(i in c(seq(1,length(p.p),by=8),length(p.p))){
  text(p.p[i]+p.bar,hl,round(K[i],2),col=tonalita.grigio[i],cex=0.6)}

#dev.off()

Bootstrap simulation study for assessing interval calibration under the null

nrep=1000

sim.res=data.frame(NULL)
snsim=c(seq(100,250,by=10))
sgamma.shape=c(1/25,100)
sprop.compliers=c(0.2,0.9)
for(nsim in snsim){
  for(gamma.shape in sgamma.shape){
    for(prop.compliers in sprop.compliers){
      
      rrp=foreach(i=1:nrep,.combine = "rbind",
                  .packages = c("bcaboot","BayesTree","grf")) %dopar% {
                    ids=which(nsim%in%snsim)*100000+
                      which(gamma.shape%in%sgamma.shape)*10000+
                      which(prop.compliers%in%sprop.compliers)*1000+i
                    set.seed(ids)
                    psim=runif(nsim,-1,1)
                    psim=psim-mean(psim)
                    zsim=psim<0
                    denssim=density(psim) # kernel density
                    Ksim=rep(NA,nsim)
                    for(i in 1:nsim) Ksim[i]=mean(
                      c(denssim$y[denssim$x==min(denssim$x[denssim$x>=psim[i]])],
                        denssim$y[denssim$x==max(denssim$x[denssim$x<=psim[i]])]))
                    Ksim=Ksim/sum(Ksim)
                    ysim=rgamma(nsim,shape=gamma.shape,rate=1)
                    dsim=0
                    while((sum(dsim)==0)|(sum(dsim)==nsim)){
                      dsim=1*(zsim)
                      ii=sample(which(psim<0),
                                size=floor(sum(psim<0)*(1-prop.compliers)))
                      dsim[ii]=1-dsim[ii]
                    }
                    rr=bootci(y=log(ysim),p=psim,K=Ksim,z=zsim,d=dsim,M=10000)
                    intres=rbind(
                      data.frame(inf=rr[1],sup=rr[2],lev=0.999),
                      data.frame(inf=rr[3],sup=rr[4],lev=0.99),
                      data.frame(inf=rr[5],sup=rr[6],lev=0.95),
                      data.frame(inf=rr[7],sup=rr[8],lev=0.90))
                    
                    ## Analysis with BART
                    x.insample=as.matrix(data.frame(zsim,dsim,psim))
                    x.outsample=x.insample
                    # Changing the Effective Status
                    x.outsample[,1]=1-x.outsample[,1]
                    y.e=log(as.double(ysim))
                    bartFit = bart(x.train=x.insample,y.train=y.e,
                                   x.test=x.outsample,
                                   ndpost=10000,nskip=1000,verbose = FALSE)
                    yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
                    factual=which(dsim==1)
                    cfactual=length(ysim)+which(dsim==0)
                    pemb=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
                    intresbart=rbind(
                      data.frame(inf=quantile(pemb,p=0.0005),sup=quantile(pemb,p=0.9995),
                                 lev=0.999),
                      data.frame(inf=quantile(pemb,p=0.005),sup=quantile(pemb,p=0.995),
                                 lev=0.99),
                      data.frame(inf=quantile(pemb,p=0.025),sup=quantile(pemb,p=0.975),
                                 lev=0.95),
                      data.frame(inf=quantile(pemb,p=0.05),sup=quantile(pemb,p=0.95),
                                 lev=0.90))
                    
                    ## Analysis with Causal Forest
                    X=as.matrix(data.frame(psim))
                    crfsim=causal_forest(X=X,Y=log(ysim),W=zsim,
                                         num.trees = 10000,honesty = TRUE)
                    atesim=average_treatment_effect(crfsim, 
                                                    target.sample = "overlap")
                    infcf=round(atesim[1]+
                                  qnorm(c(0.0005,0.005,0.025,0.05))*atesim[2],2)
                    supcf=round(atesim[1]-
                                  qnorm(c(0.0005,0.005,0.025,0.05))*atesim[2],2)
                    intrescf=cbind(inf=infcf,
                                   sup=supcf,lev=c(0.999,0.99,0.95,0.9))
                    
                    tt=factor(c("Boot","Bart","CF")[gl(3,4)])
                    intresff=cbind(rbind(intres,intresbart,intrescf),
                                   data.frame(ids=ids,n=nsim,
                                              gamma.shape=gamma.shape,
                                              prop.compliers=prop.compliers
                                              ,nrep=nrep),technique=tt)
                    return(intresff)
                  }
      sim.res=rbind(sim.res,rrp)
      save(sim.res,file="simulres-all1.RData")
      cat(prop.compliers,gamma.shape,nsim,"\n")
    }
  }
}

sim.res=sim.res[-1,]

save(sim.res,file="simulres-all1.RData")

Analysis of simulations

load(file="simres.RData")
sim.res$technique=factor(as.character(sim.res$technique),
                         levels=c("Boot","Bart","CF"),
                                 labels=c("Bootstrap","BART","Causal Forest"))

sim.res = sim.res %>% 
  filter(abs(inf)<Inf,abs(sup)<Inf) %>% 
  mutate(inside=(inf<0)&(sup>0),int.wide=sup-inf) %>% 
  filter(int.wide<quantile(int.wide,p=0.99),n>100) %>%
  mutate(skewness=2/sqrt(gamma.shape)) %>%
  rename(Method=technique) 

res1= sim.res %>% 
  group_by(Method,lev,n,skewness,prop.compliers) %>% 
  summarise(cov=mean(inside),
            sdcov=sqrt(mean(inside)*(1-mean(inside))/length(inside)),
            wide.mean=mean(int.wide),wide.sd=sd(int.wide)/sqrt(length(int.wide))) %>%
  mutate(min.cov=cov+qnorm(0.95)*sdcov,max.cov=cov-qnorm(0.95)*sdcov,
         min.wide=wide.mean+qnorm(0.95)*wide.sd,max.wide=wide.mean-qnorm(0.95)*wide.sd) %>%
  ungroup() %>%
  filter(lev%in%c(0.95,0.99),prop.compliers%in%c(0.5,0.9)) %>%
  mutate(skewness=round(skewness,1))
## `summarise()` has grouped output by 'Method', 'lev', 'n', 'skewness'. You can
## override using the `.groups` argument.
# Custom labels for lev and prop.compliers
custom_labels <- function(variable, value) {
  if (variable == "lev") {
    return(paste("lev=", round(value*100,0),"%", sep = ""))
  }
  if (variable == "prop.compliers") {
    return(paste("p.comp.=", round(value*100,0),"%", sep = ""))
  }
  if (variable == "skewness") {
    return(paste("skewness=", round(value,1), sep = ""))
  }
  return(as.character(value))
}


g1=ggplot(aes(x=n,y=cov,ymin=min.cov,ymax=max.cov,color=Method,shape = Method),data=res1)+
  geom_pointrange()+
  facet_grid(skewness+prop.compliers~lev,scales = "free_y",
             labeller = custom_labels)+
  geom_hline(data = res1, aes(yintercept = lev), inherit.aes = FALSE,linetype="dotted") +
  theme_bw()+ylab("Coverage")
## Warning: The `labeller` API has been updated. Labellers taking `variable` and `value`
## arguments are now deprecated.
## ℹ See labellers documentation.
## Warning in geom_hline(data = res1, aes(yintercept = lev), inherit.aes = FALSE,
## : Ignoring unknown parameters: `inherit.aes`
g1

g2=ggplot(aes(x=n,y=wide.mean,ymin=min.wide,ymax=max.wide,color=Method,shape = Method),data=res1)+
  geom_pointrange()+
  facet_grid(skewness+prop.compliers~lev,scales = "free_y",
             labeller = custom_labels)+
  geom_point()+theme_bw()+ylab("Interval Lenght")
## Warning: The `labeller` API has been updated. Labellers taking `variable` and `value`
## arguments are now deprecated.
## ℹ See labellers documentation.
g2

pdf(file="simcov.pdf")
g1
dev.off()
## quartz_off_screen 
##                 2
pdf(file="simlength.pdf")
g2
dev.off()
## quartz_off_screen 
##                 2

Other analysis

Regression Tree Analysis

library(rpart)
library(rpart.plot)
library(rattle)
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
regtree <- rpart(z ~ i1+i2+i3+i4)
regtree
## n= 215 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 215 53.7395300 0.50697670  
##    2) i1< 0.3978639 141 30.2695000 0.31205670  
##      4) i4< 27.5 123 22.1626000 0.23577240  
##        8) i3< 1.212522 76  6.3552630 0.09210526  
##         16) i1< 0.3241837 47  0.9787234 0.02127660 *
##         17) i1>=0.3241837 29  4.7586210 0.20689660  
##           34) i3>=1.081113 18  0.9444444 0.05555556 *
##           35) i3< 1.081113 11  2.7272730 0.45454550 *
##        9) i3>=1.212522 47 11.7021300 0.46808510  
##         18) i4< 24.5 28  4.1071430 0.17857140  
##           36) i2< 0.005782026 18  0.9444444 0.05555556 *
##           37) i2>=0.005782026 10  2.4000000 0.40000000 *
##         19) i4>=24.5 19  1.7894740 0.89473680 *
##      5) i4>=27.5 18  2.5000000 0.83333330 *
##    3) i1>=0.3978639 74  7.9054050 0.87837840  
##      6) i4< 25.5 32  6.4687500 0.71875000  
##       12) i1< 0.4254474 10  2.1000000 0.30000000 *
##       13) i1>=0.4254474 22  1.8181820 0.90909090 *
##      7) i4>=25.5 42  0.0000000 1.00000000 *
summary(regtree)
## Call:
## rpart(formula = z ~ i1 + i2 + i3 + i4)
##   n= 215 
## 
##           CP nsplit rel error    xerror        xstd
## 1 0.28963083      0 1.0000000 1.0038491 0.002693989
## 2 0.10433477      1 0.7103692 0.7848672 0.062036591
## 3 0.09221072      2 0.6060344 0.7503427 0.070635492
## 4 0.03709767      4 0.4216130 0.6728993 0.076844316
## 5 0.01586190      6 0.3474176 0.6817847 0.084226305
## 6 0.01419250      8 0.3156938 0.6664058 0.083140186
## 7 0.01000000      9 0.3015013 0.6629894 0.083510572
## 
## Variable importance
## i1 i4 i3 i2 
## 49 29 14  7 
## 
## Node number 1: 215 observations,    complexity param=0.2896308
##   mean=0.5069767, MSE=0.2499513 
##   left son=2 (141 obs) right son=3 (74 obs)
##   Primary splits:
##       i1 < 0.3978639    to the left,  improve=0.28963080, (0 missing)
##       i3 < 1.162948     to the left,  improve=0.12175900, (0 missing)
##       i4 < 24.5         to the left,  improve=0.11420310, (0 missing)
##       i2 < 0.01909164   to the left,  improve=0.03893459, (0 missing)
##   Surrogate splits:
##       i3 < 1.40873      to the left,  agree=0.665, adj=0.027, (0 split)
##       i2 < 0.02380738   to the left,  agree=0.660, adj=0.014, (0 split)
## 
## Node number 2: 141 observations,    complexity param=0.1043348
##   mean=0.3120567, MSE=0.2146773 
##   left son=4 (123 obs) right son=5 (18 obs)
##   Primary splits:
##       i4 < 27.5         to the left,  improve=0.1852327, (0 missing)
##       i3 < 1.162948     to the left,  improve=0.1219094, (0 missing)
##       i2 < 0.01982193   to the left,  improve=0.0399378, (0 missing)
##       i1 < 0.3007355    to the left,  improve=0.0366842, (0 missing)
## 
## Node number 3: 74 observations,    complexity param=0.03709767
##   mean=0.8783784, MSE=0.1068298 
##   left son=6 (32 obs) right son=7 (42 obs)
##   Primary splits:
##       i4 < 25.5         to the left,  improve=0.18173080, (0 missing)
##       i1 < 0.4186274    to the left,  improve=0.16624200, (0 missing)
##       i3 < 1.087085     to the left,  improve=0.09042496, (0 missing)
##       i2 < 0.004128366  to the left,  improve=0.05160640, (0 missing)
##   Surrogate splits:
##       i2 < 0.005751243  to the right, agree=0.635, adj=0.156, (0 split)
##       i3 < 1.03652      to the left,  agree=0.608, adj=0.094, (0 split)
##       i1 < 0.712        to the right, agree=0.595, adj=0.063, (0 split)
## 
## Node number 4: 123 observations,    complexity param=0.09221072
##   mean=0.2357724, MSE=0.1801838 
##   left son=8 (76 obs) right son=9 (47 obs)
##   Primary splits:
##       i3 < 1.212522     to the left,  improve=0.18523140, (0 missing)
##       i4 < 24.5         to the left,  improve=0.09953909, (0 missing)
##       i2 < 0.0004058315 to the left,  improve=0.03962522, (0 missing)
##       i1 < 0.3229234    to the left,  improve=0.03217129, (0 missing)
##   Surrogate splits:
##       i1 < 0.3571322    to the left,  agree=0.732, adj=0.298, (0 split)
##       i4 < 23.5         to the right, agree=0.642, adj=0.064, (0 split)
##       i2 < 0.0294965    to the left,  agree=0.626, adj=0.021, (0 split)
## 
## Node number 5: 18 observations
##   mean=0.8333333, MSE=0.1388889 
## 
## Node number 6: 32 observations,    complexity param=0.03709767
##   mean=0.71875, MSE=0.2021484 
##   left son=12 (10 obs) right son=13 (22 obs)
##   Primary splits:
##       i1 < 0.4254474    to the left,  improve=0.39429070, (0 missing)
##       i2 < 0.004236634  to the left,  improve=0.27085350, (0 missing)
##       i3 < 1.162948     to the left,  improve=0.18411160, (0 missing)
##       i4 < 24.5         to the right, improve=0.01449275, (0 missing)
## 
## Node number 7: 42 observations
##   mean=1, MSE=0 
## 
## Node number 8: 76 observations,    complexity param=0.0158619
##   mean=0.09210526, MSE=0.08362188 
##   left son=16 (47 obs) right son=17 (29 obs)
##   Primary splits:
##       i1 < 0.3241837    to the left,  improve=0.09722950, (0 missing)
##       i2 < 0.002904424  to the left,  improve=0.06616257, (0 missing)
##       i4 < 26.5         to the left,  improve=0.06283366, (0 missing)
##       i3 < 1.081113     to the right, improve=0.01247863, (0 missing)
##   Surrogate splits:
##       i3 < 1.058231     to the left,  agree=0.645, adj=0.069, (0 split)
## 
## Node number 9: 47 observations,    complexity param=0.09221072
##   mean=0.4680851, MSE=0.2489814 
##   left son=18 (28 obs) right son=19 (19 obs)
##   Primary splits:
##       i4 < 24.5         to the left,  improve=0.49610730, (0 missing)
##       i1 < 0.3037437    to the right, improve=0.09123339, (0 missing)
##       i2 < 0.004423548  to the right, improve=0.06321258, (0 missing)
##       i3 < 1.360607     to the left,  improve=0.05204644, (0 missing)
##   Surrogate splits:
##       i2 < 0.004423548  to the right, agree=0.723, adj=0.316, (0 split)
##       i1 < 0.3430486    to the right, agree=0.702, adj=0.263, (0 split)
##       i3 < 1.547619     to the left,  agree=0.638, adj=0.105, (0 split)
## 
## Node number 12: 10 observations
##   mean=0.3, MSE=0.21 
## 
## Node number 13: 22 observations
##   mean=0.9090909, MSE=0.08264463 
## 
## Node number 16: 47 observations
##   mean=0.0212766, MSE=0.0208239 
## 
## Node number 17: 29 observations,    complexity param=0.0158619
##   mean=0.2068966, MSE=0.1640904 
##   left son=34 (18 obs) right son=35 (11 obs)
##   Primary splits:
##       i3 < 1.081113     to the right, improve=0.22840730, (0 missing)
##       i2 < 0.002785339  to the left,  improve=0.13729980, (0 missing)
##       i4 < 24.5         to the left,  improve=0.13729980, (0 missing)
##       i1 < 0.3412005    to the right, improve=0.09528515, (0 missing)
##   Surrogate splits:
##       i1 < 0.3556962    to the left,  agree=0.793, adj=0.455, (0 split)
##       i2 < 0.01772197   to the left,  agree=0.690, adj=0.182, (0 split)
##       i4 < 23.5         to the right, agree=0.655, adj=0.091, (0 split)
## 
## Node number 18: 28 observations,    complexity param=0.0141925
##   mean=0.1785714, MSE=0.1466837 
##   left son=36 (18 obs) right son=37 (10 obs)
##   Primary splits:
##       i2 < 0.005782026  to the left,  improve=0.18570050, (0 missing)
##       i4 < 23.5         to the left,  improve=0.15108110, (0 missing)
##       i3 < 1.37012      to the left,  improve=0.14202900, (0 missing)
##       i1 < 0.3701228    to the left,  improve=0.02608696, (0 missing)
##   Surrogate splits:
##       i1 < 0.3222502    to the right, agree=0.75, adj=0.3, (0 split)
## 
## Node number 19: 19 observations
##   mean=0.8947368, MSE=0.09418283 
## 
## Node number 34: 18 observations
##   mean=0.05555556, MSE=0.05246914 
## 
## Node number 35: 11 observations
##   mean=0.4545455, MSE=0.2479339 
## 
## Node number 36: 18 observations
##   mean=0.05555556, MSE=0.05246914 
## 
## Node number 37: 10 observations
##   mean=0.4, MSE=0.24
pdf(file="results-rt.pdf")
fancyRpartPlot(regtree,main="Regression Tree: z ~ I1+I2+I3+I4",sub = "")
dev.off()
## quartz_off_screen 
##                 2

Analysis With BART

#pdf(file="results-bart.pdf")
par(mfrow=c(2,1))
# Analysis of Employement
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
y.e=log(ey3/ey0)[usable]    # Response variale: log-ratio
z.e=(p>0)[usable]     # Mandated status for the available sample
p.e=p[usable] # Score for available sample
d.e=d[usable] # Effective status for available sample
x.insample=as.matrix(data.frame(z.e,d.e,p.e))
x.outsample=x.insample
x.outsample[,1]=1-x.outsample[,1] # Changing the Effective Status
y.e=as.double(y.e)
bartFit = bart(x.train=x.insample,y.train=y.e,x.test=x.outsample,ndpost=10000,nskip=1000)
## 
## 
## Running BART with numeric y
## 
## number of trees: 200
## Prior:
##  k: 2.000000
##  degrees of freedom in sigma prior: 3
##  quantile in sigma prior: 0.900000
##  power and base for tree prior: 2.000000 0.950000
##  use quantiles for rule cut points: 0
## data:
##  number of training observations: 82
##  number of test observations: 82
##  number of explanatory variables: 3
## 
## 
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100) 
## 
## 
## Running mcmc loop:
## iteration: 100 (of 11000)
## iteration: 200 (of 11000)
## iteration: 300 (of 11000)
## iteration: 400 (of 11000)
## iteration: 500 (of 11000)
## iteration: 600 (of 11000)
## iteration: 700 (of 11000)
## iteration: 800 (of 11000)
## iteration: 900 (of 11000)
## iteration: 1000 (of 11000)
## iteration: 1100 (of 11000)
## iteration: 1200 (of 11000)
## iteration: 1300 (of 11000)
## iteration: 1400 (of 11000)
## iteration: 1500 (of 11000)
## iteration: 1600 (of 11000)
## iteration: 1700 (of 11000)
## iteration: 1800 (of 11000)
## iteration: 1900 (of 11000)
## iteration: 2000 (of 11000)
## iteration: 2100 (of 11000)
## iteration: 2200 (of 11000)
## iteration: 2300 (of 11000)
## iteration: 2400 (of 11000)
## iteration: 2500 (of 11000)
## iteration: 2600 (of 11000)
## iteration: 2700 (of 11000)
## iteration: 2800 (of 11000)
## iteration: 2900 (of 11000)
## iteration: 3000 (of 11000)
## iteration: 3100 (of 11000)
## iteration: 3200 (of 11000)
## iteration: 3300 (of 11000)
## iteration: 3400 (of 11000)
## iteration: 3500 (of 11000)
## iteration: 3600 (of 11000)
## iteration: 3700 (of 11000)
## iteration: 3800 (of 11000)
## iteration: 3900 (of 11000)
## iteration: 4000 (of 11000)
## iteration: 4100 (of 11000)
## iteration: 4200 (of 11000)
## iteration: 4300 (of 11000)
## iteration: 4400 (of 11000)
## iteration: 4500 (of 11000)
## iteration: 4600 (of 11000)
## iteration: 4700 (of 11000)
## iteration: 4800 (of 11000)
## iteration: 4900 (of 11000)
## iteration: 5000 (of 11000)
## iteration: 5100 (of 11000)
## iteration: 5200 (of 11000)
## iteration: 5300 (of 11000)
## iteration: 5400 (of 11000)
## iteration: 5500 (of 11000)
## iteration: 5600 (of 11000)
## iteration: 5700 (of 11000)
## iteration: 5800 (of 11000)
## iteration: 5900 (of 11000)
## iteration: 6000 (of 11000)
## iteration: 6100 (of 11000)
## iteration: 6200 (of 11000)
## iteration: 6300 (of 11000)
## iteration: 6400 (of 11000)
## iteration: 6500 (of 11000)
## iteration: 6600 (of 11000)
## iteration: 6700 (of 11000)
## iteration: 6800 (of 11000)
## iteration: 6900 (of 11000)
## iteration: 7000 (of 11000)
## iteration: 7100 (of 11000)
## iteration: 7200 (of 11000)
## iteration: 7300 (of 11000)
## iteration: 7400 (of 11000)
## iteration: 7500 (of 11000)
## iteration: 7600 (of 11000)
## iteration: 7700 (of 11000)
## iteration: 7800 (of 11000)
## iteration: 7900 (of 11000)
## iteration: 8000 (of 11000)
## iteration: 8100 (of 11000)
## iteration: 8200 (of 11000)
## iteration: 8300 (of 11000)
## iteration: 8400 (of 11000)
## iteration: 8500 (of 11000)
## iteration: 8600 (of 11000)
## iteration: 8700 (of 11000)
## iteration: 8800 (of 11000)
## iteration: 8900 (of 11000)
## iteration: 9000 (of 11000)
## iteration: 9100 (of 11000)
## iteration: 9200 (of 11000)
## iteration: 9300 (of 11000)
## iteration: 9400 (of 11000)
## iteration: 9500 (of 11000)
## iteration: 9600 (of 11000)
## iteration: 9700 (of 11000)
## iteration: 9800 (of 11000)
## iteration: 9900 (of 11000)
## iteration: 10000 (of 11000)
## iteration: 10100 (of 11000)
## iteration: 10200 (of 11000)
## iteration: 10300 (of 11000)
## iteration: 10400 (of 11000)
## iteration: 10500 (of 11000)
## iteration: 10600 (of 11000)
## iteration: 10700 (of 11000)
## iteration: 10800 (of 11000)
## iteration: 10900 (of 11000)
## iteration: 11000 (of 11000)
## time for loop: 17
## 
## Tree sizes, last iteration:
## 1 3 4 3 2 2 2 3 3 3 2 2 3 2 3 2 2 2 3 2 
## 2 2 2 1 3 2 3 2 2 3 2 2 2 2 2 3 2 2 2 2 
## 2 2 1 4 2 2 3 2 2 2 1 2 2 2 2 2 2 2 2 3 
## 2 2 2 2 3 2 2 2 3 2 2 3 2 3 4 2 2 3 2 4 
## 3 3 1 2 3 2 2 2 2 3 2 2 2 2 2 2 2 3 2 2 
## 1 2 2 2 2 2 3 2 2 2 2 2 1 2 2 1 3 1 2 2 
## 2 2 2 2 2 2 2 3 2 2 2 3 2 2 3 3 4 2 2 3 
## 2 2 2 2 2 2 3 2 2 2 4 2 2 2 1 2 2 2 2 1 
## 2 2 3 2 3 2 5 2 2 2 2 2 1 2 2 2 3 3 2 2 
## 2 2 2 2 2 2 2 2 2 2 3 1 2 2 2 4 2 2 2 4 
## Variable Usage, last iteration (var:count):
## (1: 51) (2: 83) (3: 109) 
## DONE BART 11-2-2014
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(d.e==1)
cfactual=length(y.e)+which(d.e==0)
post.effect.marg=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
hist(post.effect.marg,nclass=50,prob=TRUE,main="(a) Posterior Effect on \n Occupation",xlab="log(y2003/y2000)",ylab="Density")
mean(post.effect.marg>0)
## [1] 0.9232
mean(post.effect.marg)
## [1] 0.4223907
qe=quantile(post.effect.marg,p=c(0.025,0.975))
qe
##       2.5%      97.5% 
## -0.1602609  0.9951733
exp(qe)
##      2.5%     97.5% 
## 0.8519215 2.7051930
abline(v=qe,lty=2)

quantile(post.effect.marg,p=c(0.005,0.995))
##       0.5%      99.5% 
## -0.3338747  1.1708917
exp(quantile(post.effect.marg,p=c(0.005,0.995)))
##      0.5%     99.5% 
## 0.7161435 3.2248668
quantile(post.effect.marg,p=c(0.025,0.975))
##       2.5%      97.5% 
## -0.1602609  0.9951733
exp(quantile(post.effect.marg,p=c(0.025,0.975)))
##      2.5%     97.5% 
## 0.8519215 2.7051930
quantile(post.effect.marg,p=c(0.05,0.95))
##          5%         95% 
## -0.06165581  0.90305738
exp(quantile(post.effect.marg,p=c(0.05,0.95)))
##        5%       95% 
## 0.9402064 2.4671346
# Analysis of production value
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
y.p=log(py3/py0)[usable]    # Response variale: log-ratio
z.p=(p>0)[usable]     # Mandated status for the available sample
p.p=p[usable] # Score for available sample
d.p=d[usable] # Effective status for available sample
x.insample=as.matrix(data.frame(z.p,d.p,p.p))
x.outsample=x.insample
x.outsample[,1]=1-x.outsample[,1] # Changing the Effective Status
y.p=as.double(y.p)
bartFit = bart(x.train=x.insample,y.train=y.p,x.test=x.outsample,ndpost=10000,nskip=1000)
## 
## 
## Running BART with numeric y
## 
## number of trees: 200
## Prior:
##  k: 2.000000
##  degrees of freedom in sigma prior: 3
##  quantile in sigma prior: 0.900000
##  power and base for tree prior: 2.000000 0.950000
##  use quantiles for rule cut points: 0
## data:
##  number of training observations: 93
##  number of test observations: 93
##  number of explanatory variables: 3
## 
## 
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100) 
## 
## 
## Running mcmc loop:
## iteration: 100 (of 11000)
## iteration: 200 (of 11000)
## iteration: 300 (of 11000)
## iteration: 400 (of 11000)
## iteration: 500 (of 11000)
## iteration: 600 (of 11000)
## iteration: 700 (of 11000)
## iteration: 800 (of 11000)
## iteration: 900 (of 11000)
## iteration: 1000 (of 11000)
## iteration: 1100 (of 11000)
## iteration: 1200 (of 11000)
## iteration: 1300 (of 11000)
## iteration: 1400 (of 11000)
## iteration: 1500 (of 11000)
## iteration: 1600 (of 11000)
## iteration: 1700 (of 11000)
## iteration: 1800 (of 11000)
## iteration: 1900 (of 11000)
## iteration: 2000 (of 11000)
## iteration: 2100 (of 11000)
## iteration: 2200 (of 11000)
## iteration: 2300 (of 11000)
## iteration: 2400 (of 11000)
## iteration: 2500 (of 11000)
## iteration: 2600 (of 11000)
## iteration: 2700 (of 11000)
## iteration: 2800 (of 11000)
## iteration: 2900 (of 11000)
## iteration: 3000 (of 11000)
## iteration: 3100 (of 11000)
## iteration: 3200 (of 11000)
## iteration: 3300 (of 11000)
## iteration: 3400 (of 11000)
## iteration: 3500 (of 11000)
## iteration: 3600 (of 11000)
## iteration: 3700 (of 11000)
## iteration: 3800 (of 11000)
## iteration: 3900 (of 11000)
## iteration: 4000 (of 11000)
## iteration: 4100 (of 11000)
## iteration: 4200 (of 11000)
## iteration: 4300 (of 11000)
## iteration: 4400 (of 11000)
## iteration: 4500 (of 11000)
## iteration: 4600 (of 11000)
## iteration: 4700 (of 11000)
## iteration: 4800 (of 11000)
## iteration: 4900 (of 11000)
## iteration: 5000 (of 11000)
## iteration: 5100 (of 11000)
## iteration: 5200 (of 11000)
## iteration: 5300 (of 11000)
## iteration: 5400 (of 11000)
## iteration: 5500 (of 11000)
## iteration: 5600 (of 11000)
## iteration: 5700 (of 11000)
## iteration: 5800 (of 11000)
## iteration: 5900 (of 11000)
## iteration: 6000 (of 11000)
## iteration: 6100 (of 11000)
## iteration: 6200 (of 11000)
## iteration: 6300 (of 11000)
## iteration: 6400 (of 11000)
## iteration: 6500 (of 11000)
## iteration: 6600 (of 11000)
## iteration: 6700 (of 11000)
## iteration: 6800 (of 11000)
## iteration: 6900 (of 11000)
## iteration: 7000 (of 11000)
## iteration: 7100 (of 11000)
## iteration: 7200 (of 11000)
## iteration: 7300 (of 11000)
## iteration: 7400 (of 11000)
## iteration: 7500 (of 11000)
## iteration: 7600 (of 11000)
## iteration: 7700 (of 11000)
## iteration: 7800 (of 11000)
## iteration: 7900 (of 11000)
## iteration: 8000 (of 11000)
## iteration: 8100 (of 11000)
## iteration: 8200 (of 11000)
## iteration: 8300 (of 11000)
## iteration: 8400 (of 11000)
## iteration: 8500 (of 11000)
## iteration: 8600 (of 11000)
## iteration: 8700 (of 11000)
## iteration: 8800 (of 11000)
## iteration: 8900 (of 11000)
## iteration: 9000 (of 11000)
## iteration: 9100 (of 11000)
## iteration: 9200 (of 11000)
## iteration: 9300 (of 11000)
## iteration: 9400 (of 11000)
## iteration: 9500 (of 11000)
## iteration: 9600 (of 11000)
## iteration: 9700 (of 11000)
## iteration: 9800 (of 11000)
## iteration: 9900 (of 11000)
## iteration: 10000 (of 11000)
## iteration: 10100 (of 11000)
## iteration: 10200 (of 11000)
## iteration: 10300 (of 11000)
## iteration: 10400 (of 11000)
## iteration: 10500 (of 11000)
## iteration: 10600 (of 11000)
## iteration: 10700 (of 11000)
## iteration: 10800 (of 11000)
## iteration: 10900 (of 11000)
## iteration: 11000 (of 11000)
## time for loop: 19
## 
## Tree sizes, last iteration:
## 2 2 2 2 2 2 3 2 2 2 2 2 3 2 3 2 2 1 2 2 
## 2 2 2 2 2 2 2 2 1 2 2 3 1 2 2 3 3 3 2 2 
## 2 2 2 3 2 1 1 2 3 2 2 2 2 2 2 2 2 1 2 2 
## 2 1 2 2 1 3 1 1 2 2 2 2 2 2 3 3 2 2 2 2 
## 3 2 2 2 2 2 3 2 3 2 3 1 3 2 3 3 2 2 2 3 
## 2 3 2 2 2 3 3 2 2 2 3 2 2 2 2 3 1 2 3 3 
## 2 4 2 2 2 2 2 4 2 2 3 2 2 2 2 2 2 3 2 4 
## 2 1 2 2 2 3 2 2 3 2 2 2 2 2 2 2 3 3 4 4 
## 2 2 2 3 4 3 2 2 1 2 3 2 2 2 2 2 3 2 2 2 
## 2 1 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 3 2 2 
## Variable Usage, last iteration (var:count):
## (1: 78) (2: 85) (3: 73) 
## DONE BART 11-2-2014
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(d.p==1)
cfactual=93+which(d.p==0)
post.effect.marg=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
hist(post.effect.marg,nclass=50,prob=TRUE,main="(b) Posterior Effect on \n Production Value",xlab="log(y2003/y2000)",ylab="Density")
mean(post.effect.marg>0)
## [1] 0.848
mean(post.effect.marg)
## [1] 0.3524183
qe=quantile(post.effect.marg,p=c(0.025,0.975))
qe
##       2.5%      97.5% 
## -0.3151991  1.0502273
abline(v=qe,lty=2)

quantile(post.effect.marg,p=c(0.005,0.995))
##       0.5%      99.5% 
## -0.5537272  1.2753641
exp(quantile(post.effect.marg,p=c(0.005,0.995)))
##      0.5%     99.5% 
## 0.5748034 3.5800045
quantile(post.effect.marg,p=c(0.025,0.975))
##       2.5%      97.5% 
## -0.3151991  1.0502273
exp(quantile(post.effect.marg,p=c(0.025,0.975)))
##      2.5%     97.5% 
## 0.7296436 2.8583008
quantile(post.effect.marg,p=c(0.5,0.95))
##       50%       95% 
## 0.3486283 0.9402258
exp(quantile(post.effect.marg,p=c(0.05,0.95)))
##        5%       95% 
## 0.8115872 2.5605596
#dev.off()

Analysis with Causal Forests

usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
Y=log(ey3/ey0)[usable]    # Response variale: log-ratio
W=d[usable]    # Effective status for available sample
p.e=p[usable] # Score for available sample
X=as.matrix(data.frame(p.e))

crf.e=causal_forest(X=X,Y=Y,W=W,
                  num.trees = 10000,honesty = TRUE)
ate.e=average_treatment_effect(crf.e, target.sample = "overlap")

usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
Y=log(py3/py0)[usable]    # Response variale: log-ratio
W=d[usable]    # Effective status for available sample
p.p=p[usable] # Score for available sample
X=as.matrix(data.frame(p.p))

crf.p=causal_forest(X=X,Y=Y,W=W,
                    num.trees = 10000,honesty = TRUE)
ate.p=average_treatment_effect(crf.p, target.sample = "overlap")


inf=round(ate.e[1]+qnorm(c(0.005,0.025,0.05))*ate.e[2],2)
sup=round(ate.e[1]-qnorm(c(0.005,0.025,0.05))*ate.e[2],2)
cbind(inf,sup)
##        inf  sup
## [1,] -0.28 0.96
## [2,] -0.13 0.81
## [3,] -0.05 0.74
inf=round(ate.p[1]+qnorm(c(0.005,0.025,0.05))*ate.p[2],2)
sup=round(ate.p[1]-qnorm(c(0.005,0.025,0.05))*ate.p[2],2)
cbind(inf,sup)
##        inf  sup
## [1,] -0.28 0.92
## [2,] -0.14 0.77
## [3,] -0.06 0.70