DATA collected at Osservatorio Economico della Sardegna

Enterprises are sorted by score (p variable)

source("read-dat.R")
head(dat)
##                 i1          i2       i3 i4         p z d ey0 ey3  py0  py3 dy0
## 276/99   0.3000000 0.000000000 1.000000 19 -4.645011 0 0  10  NA 3616   NA  NA
## 343/99   0.3000000 0.002730963 1.000000 21 -3.606017 0 0  NA  NA   NA  315  NA
## 257/99   0.3000000 0.006521739 1.000000 23 -3.293505 0 0  NA  NA   NA   NA  NA
## D1500264 0.3000344 0.001377885 1.000000 22 -3.237792 0 1  18  21 7983 9409  NA
## 307/99   0.3214286 0.011127107 1.176471 21 -3.173211 0 0   1  NA  484   NA  NA
## D1500228 0.3227699 0.003129890 1.052632 23 -3.114608 0 0   2  NA  269  257  NA
##          dpy3
## 276/99     NA
## 343/99     NA
## 257/99     NA
## D1500264   NA
## 307/99     NA
## D1500228   NA

X.plus and X.minus estimator

These Functions returns the + (.plus) and - (.minus) in alpha.hat formula

minimi.plus=function(X,S,K,Z){
A=sum(K*Z*X)
B=sum(K*Z)
C=sum(Z*K*S)
D=sum(Z*K*S*X)
E=sum(K*Z*(S^2))
b.plus=(A*C-B*D)/(C^2-B*E)
x.plus=(A-b.plus*C)/B
return(c(x.plus,b.plus))}

minimi.minus=function(X,S,K,Z){
A=sum(K*(1-Z)*X)
B=sum(K*(1-Z))
C=sum((1-Z)*K*S)
D=sum((1-Z)*K*S*X)
E=sum(K*(1-Z)*(S^2))
b.minus=(A*C-B*D)/(C^2-B*E)
x.minus=(A-b.minus*C)/B
return(c(x.minus,b.minus))}

The Bootstrap

This function returns Calibrated BootStrap CI that is, the nominal coverage correspond to actua coverage

bootci=function(y,p,K,z,d,M=1000,init.seed=NULL){
    if(!is.null(init.seed)) set.seed(init.seed)
    alpha.hat=function(bdat){
      y=bdat$y
      p=bdat$p
      K=bdat$K
      z=bdat$z
      d=bdat$d
      alpha.hat=(minimi.plus(y,p,K,z)[1]-minimi.minus(y,p,K,z)[1])/
        (minimi.plus(d,p,K,z)[1]-minimi.minus(d,p,K,z)[1])
      return(alpha.hat)
    }
    bdat=data.frame(y=y,p=p,K=K,z=z,d=d)
    res=suppressWarnings (bcajack(x = bdat, B = M, func = alpha.hat, mr = 40,
            alpha = c(0.005,0.025,0.05),verbose = FALSE))
    return(c(res$lims[c(1,7),1],res$lims[c(2,6),1],
             res$lims[c(3,5),1]))
    }

Analysis with the Boostrap

Here we provide the results in the paper.

attach(dat)
p.bar=-0.41067715   # Threshold selection
n=length(p);n # Sample size used to estimate the weithing function
## [1] 215
p=p-p.bar   # Centered score
dens=density(p) # kernel density
K=rep(NA,n)
for(i in 1:n) K[i]=mean(c(dens$y[dens$x==min(dens$x[dens$x>=p[i]])],dens$y[dens$x==max(dens$x[dens$x<=p[i]])]))
K=K/sum(K)

# Analysis of production value
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
y.p=log(py3/py0)[usable]    # Response variale: log-ratio
z.p=(p>0)[usable]     # Mandated status for the available sample
p.p=p[usable] # Score for available sample
d.p=d[usable] # Effective status for available sample
K.p=K[usable] # Weight for available sample
table(z.p,d.p) # Tables 1 and 2
##        d.p
## z.p      0  1
##   FALSE 26 13
##   TRUE   9 45
ypiu=minimi.plus(y.p,p.p,K.p,z.p) # y+ estimate
ymeno=minimi.minus(y.p,p.p,K.p,z.p) # y- estimate 
dpiu=minimi.plus(d.p,p.p,K.p,z.p) # d+ estimate
dmeno=minimi.minus(d.p,p.p,K.p,z.p) # d- estimate
cat("Estimated proportion of Compliers: ",dpiu[1]-dmeno[1],"\n")
## Estimated proportion of Compliers:  0.5822672
alpha.hat.p=(ypiu[1]-ymeno[1])/(dpiu[1]-dmeno[1]) # alpha.hat
alpha.hat.p
## [1] 0.4444406
exp(alpha.hat.p)
## [1] 1.559618
# Boot Confidence Interval for effect on Production Value
bootci(y=y.p,p=p.p,K=K.p,z=z.p,d=d.p,M=100000,init.seed=17)
##       0.005       0.995       0.025       0.975        0.05        0.95 
## -0.44523722  3.66504896 -0.19506843  1.77800637 -0.08995075  1.40215145
# Analysis of Employement
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
y.e=log(ey3/ey0)[usable]    # Response variale: log-ratio
z.e=(p>0)[usable]     # Mandated status for the available sample
p.e=p[usable] # Score for available sample
d.e=d[usable] # Effective status for available sample
K.e=K[usable] # Weight for available sample
table(z.e,d.e) # Tables 1 and 2
##        d.e
## z.e      0  1
##   FALSE 22 13
##   TRUE   8 39
ypiu=minimi.plus(y.e,p.e,K.e,z.e) # y+ estimate
ymeno=minimi.minus(y.e,p.e,K.e,z.e) # y- estimate 
dpiu=minimi.plus(d.e,p.e,K.e,z.e) # d+ estimate
dmeno=minimi.minus(d.e,p.e,K.e,z.e) # d- estimate
cat("Estimated proportion of Compliers: ",dpiu[1]-dmeno[1],"\n")
## Estimated proportion of Compliers:  0.5589882
alpha.hat.e=(ypiu[1]-ymeno[1])/(dpiu[1]-dmeno[1]) # alpha.hat
alpha.hat.e
## [1] 0.5667232
exp(alpha.hat.e)
## [1] 1.762482
# Boot Confidence Interval for effect on Employement
bootci(y=y.e,p=p.e,K=K.e,z=z.e,d=d.e,M=100000,init.seed=17)
##       0.005       0.995       0.025       0.975        0.05        0.95 
## -6.51126505  4.41135983 -0.18208744  2.23276228 -0.05752017  1.72723005
# Figure 1

#pdf(file="results.pdf")
hl=-4
par(mfrow=c(2,1),mar=c(5, 5, 4, 2)-1)
# Figure Results for Employement
plot(p.e+p.bar,y.e,pch=21-d.e*2,xlab="p",ylab="y",main="(a) Effect on Occupation",ylim=c(hl,5),xlim=c(-3,4))
abline(v=p.bar,lty=2)
abline(h=0)
legend(2,5,c("Treated","Non-treated"),pch=c(19,21))
lines(p.e[p.e>0]+p.bar,ypiu[1]+ypiu[2]*( p.e[p.e>0]+p.bar),lwd=2)
lines(p.e[p.e<0]+p.bar,ymeno[1]+ymeno[2]*( p.e[p.e<0]+p.bar),lwd=2)
tonalita.grigio= grey(1-K.e/max(K.e))
for(i in 1:(length(p.e)-1)) lines(c(p.e[i]+p.bar, p.e[i+1]+p.bar),c(hl,hl),col= tonalita.grigio[i],lwd=12)
tonalita.grigio= grey(K/max(K))
for(i in c(seq(1,length(p.e),by=8), length(p.e))) text(p.e[i]+p.bar,hl,round(K[i],2),col=tonalita.grigio[i],cex=0.6)
# Figure Results for Production Value
plot(p.p+p.bar,y.p,pch=21-d.p*2,xlab="p",ylab="y",main="(b) Effect on Production Value",ylim=c(hl,5),xlim=c(-3,4))
abline(v=p.bar,lty=2)
abline(h=0)
legend(2,5,c("Treated","Non-reated"),pch=c(19,21))
lines(p.p[p.p>0]+p.bar,ypiu[1]+ypiu[2]*( p.p[p.p>0]+p.bar),lwd=2)
lines(p.p[p.p<0]+p.bar,ymeno[1]+ymeno[2]*( p.p[p.p<0]+p.bar),lwd=2)
tonalita.grigio= grey(1-K.p/max(K.p))
for(i in 1:(length(p.p)-1)) lines(c(p.p[i]+p.bar, p.p[i+1]+p.bar),c(hl,hl),col= tonalita.grigio[i],lwd=12)
tonalita.grigio= grey(K/max(K))
for(i in c(seq(1,length(p.p),by=8), length(p.p))) text(p.p[i]+p.bar,hl,round(K[i],2),col=tonalita.grigio[i],cex=0.6)

#dev.off()

Bootstrap simulation study for assessing interval calibration under the null

nrep=100

sim.res=data.frame(inf=1,sup=1,lev=0.9,n=20,gamma.shape=1,prop.compliers=0.2,nrep=nrep)

for(nsim in c(20,50,100,200,300)){
  for(gamma.shape in c(0.5,1,2,5,10)){
    for(prop.compliers in c(0.2,0.3,0.5,0.6,0.7)){
      for(i in 1:nrep){
          psim=rnorm(nsim)
          zsim=psim>0
          denssim=density(psim) # kernel density
          Ksim=rep(NA,nsim)
          for(i in 1:nsim) Ksim[i]=mean(
            c(denssim$y[denssim$x==min(denssim$x[denssim$x>=psim[i]])],
              denssim$y[denssim$x==max(denssim$x[denssim$x<=psim[i]])]))
          Ksim=Ksim/sum(Ksim)
          ysim=rgamma(nsim,shape=gamma.shape,rate=1)
          dsim=rep(0,nsim)
          while(sum(dsim)<=1){dsim[zsim]=rbinom(sum(zsim),1,p=prop.compliers)}
          rr=bootci(y=ysim,p=psim,K=Ksim,z=zsim,d=dsim,M=1000)
          intres=rbind(
            data.frame(inf=rr[1],sup=rr[2],lev=0.99),
            data.frame(inf=rr[3],sup=rr[4],lev=0.95),
            data.frame(inf=rr[5],sup=rr[6],lev=0.90))
          intres=cbind(intres,data.frame(n=nsim,
                                         gamma.shape=gamma.shape,
                                         prop.compliers=prop.compliers,nrep=nrep))
          sim.res=rbind(sim.res,intres)
      }
      cat(prop.compliers,gamma.shape,nsim,"\n")
    }
  }
}

sim.res=sim.res[-1,]

save(sim.res,file="simulres4.RData")

Analysis of simulations

load(file="simulres4.RData")
sim.res = sim.res %>% mutate(inside=(inf<0)&(sup>0),int.wide=sup-inf) %>% na.omit()

res=sim.res %>% group_by(lev,n,gamma.shape,prop.compliers) %>% summarise(cov=mean(inside),wide=mean(int.wide)) %>% ungroup()
## `summarise()` has grouped output by 'lev', 'n', 'gamma.shape'. You can override
## using the `.groups` argument.
res1= sim.res %>% 
  group_by(lev,n) %>% 
  summarise(cov=mean(inside),
            sdcov=sqrt(mean(inside)*(1-mean(inside))/length(inside)),
            wide.mean=mean(int.wide),wide.sd=sd(int.wide)/sqrt(length(inside))) %>%
  mutate(min.cov=cov+qnorm(0.025)*sdcov,max.cov=cov-qnorm(0.025)*sdcov,
         min.wide=wide.mean+qnorm(0.025)*wide.sd,max.wide=wide.mean-qnorm(0.025)*wide.sd) %>%
  ungroup()
## `summarise()` has grouped output by 'lev'. You can override using the `.groups`
## argument.
res2= sim.res %>% 
  group_by(gamma.shape,prop.compliers) %>% 
  summarise(cov=mean(inside),
            sdcov=sqrt(mean(inside)*(1-mean(inside))/length(inside)),
            wide.mean=mean(int.wide),wide.sd=sd(int.wide)/sqrt(length(inside))) %>%
  mutate(min.cov=cov+qnorm(0.025)*sdcov,max.cov=cov-qnorm(0.025)*sdcov,
         min.wide=wide.mean+qnorm(0.025)*wide.sd,max.wide=wide.mean-qnorm(0.025)*wide.sd) %>%
  ungroup()
## `summarise()` has grouped output by 'gamma.shape'. You can override using the
## `.groups` argument.
totab1=round(res1 %>% select(lev,n,cov) %>% reshape2::dcast(data=.,lev~n),2)
## Using cov as value column: use value.var to override.
rownames(totab1)=NULL
totab2=round(res2 %>% select(gamma.shape,prop.compliers,cov) %>% mutate(gamma.shape=2/sqrt(gamma.shape)) %>% reshape2::dcast(data=.,gamma.shape~prop.compliers),2)
## Using cov as value column: use value.var to override.
rownames(totab2)=NULL

print(xtable::xtable(totab1,caption="Coverage of Bootstrap C.I. by sample size $n$ (columns) and nominal levels (rows)."),include.rownames=FALSE)
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Wed Mar 20 14:19:14 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrr}
##   \hline
## lev & 20 & 50 & 100 & 200 & 300 \\ 
##   \hline
## 0.90 & 0.82 & 0.86 & 0.88 & 0.88 & 0.88 \\ 
##   0.95 & 0.91 & 0.92 & 0.93 & 0.94 & 0.93 \\ 
##   0.99 & 0.97 & 0.98 & 0.98 & 0.98 & 0.98 \\ 
##    \hline
## \end{tabular}
## \caption{Coverage of Bootstrap C.I. by sample size $n$ (columns) and nominal levels (rows).} 
## \end{table}
print(xtable::xtable(totab2,caption="Coverage of Bootstrap C.I. by proportion of compliers (columns) and asimmetry in $Y$ (rows)."),include.rownames=FALSE)
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Wed Mar 20 14:19:14 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrr}
##   \hline
## gamma.shape & 0.2 & 0.3 & 0.5 & 0.6 & 0.7 \\ 
##   \hline
## 0.63 & 0.94 & 0.94 & 0.94 & 0.94 & 0.95 \\ 
##   0.89 & 0.91 & 0.93 & 0.95 & 0.93 & 0.94 \\ 
##   1.41 & 0.93 & 0.92 & 0.93 & 0.92 & 0.92 \\ 
##   2.00 & 0.94 & 0.93 & 0.92 & 0.92 & 0.92 \\ 
##   2.83 & 0.92 & 0.91 & 0.90 & 0.92 & 0.91 \\ 
##    \hline
## \end{tabular}
## \caption{Coverage of Bootstrap C.I. by proportion of compliers (columns) and asimmetry in $Y$ (rows).} 
## \end{table}

Other analysis

Regression Tree Analysis

library(rpart)
library(rpart.plot)
library(rattle)
## Caricamento del pacchetto richiesto: tibble
## Caricamento del pacchetto richiesto: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
regtree <- rpart(z ~ i1+i2+i3+i4)
regtree
## n= 215 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 215 53.7395300 0.50697670  
##    2) i1< 0.3978639 141 30.2695000 0.31205670  
##      4) i4< 27.5 123 22.1626000 0.23577240  
##        8) i3< 1.212522 76  6.3552630 0.09210526  
##         16) i1< 0.3241837 47  0.9787234 0.02127660 *
##         17) i1>=0.3241837 29  4.7586210 0.20689660  
##           34) i3>=1.081113 18  0.9444444 0.05555556 *
##           35) i3< 1.081113 11  2.7272730 0.45454550 *
##        9) i3>=1.212522 47 11.7021300 0.46808510  
##         18) i4< 24.5 28  4.1071430 0.17857140  
##           36) i2< 0.005782026 18  0.9444444 0.05555556 *
##           37) i2>=0.005782026 10  2.4000000 0.40000000 *
##         19) i4>=24.5 19  1.7894740 0.89473680 *
##      5) i4>=27.5 18  2.5000000 0.83333330 *
##    3) i1>=0.3978639 74  7.9054050 0.87837840  
##      6) i4< 25.5 32  6.4687500 0.71875000  
##       12) i1< 0.4254474 10  2.1000000 0.30000000 *
##       13) i1>=0.4254474 22  1.8181820 0.90909090 *
##      7) i4>=25.5 42  0.0000000 1.00000000 *
summary(regtree)
## Call:
## rpart(formula = z ~ i1 + i2 + i3 + i4)
##   n= 215 
## 
##           CP nsplit rel error    xerror        xstd
## 1 0.28963083      0 1.0000000 1.0117439 0.003804685
## 2 0.10433477      1 0.7103692 0.7900188 0.061002975
## 3 0.09221072      2 0.6060344 0.8192376 0.072555246
## 4 0.03709767      4 0.4216130 0.7324996 0.078362684
## 5 0.01586190      6 0.3474176 0.6302829 0.077891653
## 6 0.01419250      8 0.3156938 0.6312773 0.076160895
## 7 0.01000000      9 0.3015013 0.6348525 0.077110544
## 
## Variable importance
## i1 i4 i3 i2 
## 49 29 14  7 
## 
## Node number 1: 215 observations,    complexity param=0.2896308
##   mean=0.5069767, MSE=0.2499513 
##   left son=2 (141 obs) right son=3 (74 obs)
##   Primary splits:
##       i1 < 0.3978639    to the left,  improve=0.28963080, (0 missing)
##       i3 < 1.162948     to the left,  improve=0.12175900, (0 missing)
##       i4 < 24.5         to the left,  improve=0.11420310, (0 missing)
##       i2 < 0.01909164   to the left,  improve=0.03893459, (0 missing)
##   Surrogate splits:
##       i3 < 1.40873      to the left,  agree=0.665, adj=0.027, (0 split)
##       i2 < 0.02380738   to the left,  agree=0.660, adj=0.014, (0 split)
## 
## Node number 2: 141 observations,    complexity param=0.1043348
##   mean=0.3120567, MSE=0.2146773 
##   left son=4 (123 obs) right son=5 (18 obs)
##   Primary splits:
##       i4 < 27.5         to the left,  improve=0.1852327, (0 missing)
##       i3 < 1.162948     to the left,  improve=0.1219094, (0 missing)
##       i2 < 0.01982193   to the left,  improve=0.0399378, (0 missing)
##       i1 < 0.3007355    to the left,  improve=0.0366842, (0 missing)
## 
## Node number 3: 74 observations,    complexity param=0.03709767
##   mean=0.8783784, MSE=0.1068298 
##   left son=6 (32 obs) right son=7 (42 obs)
##   Primary splits:
##       i4 < 25.5         to the left,  improve=0.18173080, (0 missing)
##       i1 < 0.4186274    to the left,  improve=0.16624200, (0 missing)
##       i3 < 1.087085     to the left,  improve=0.09042496, (0 missing)
##       i2 < 0.004128366  to the left,  improve=0.05160640, (0 missing)
##   Surrogate splits:
##       i2 < 0.005751243  to the right, agree=0.635, adj=0.156, (0 split)
##       i3 < 1.03652      to the left,  agree=0.608, adj=0.094, (0 split)
##       i1 < 0.712        to the right, agree=0.595, adj=0.063, (0 split)
## 
## Node number 4: 123 observations,    complexity param=0.09221072
##   mean=0.2357724, MSE=0.1801838 
##   left son=8 (76 obs) right son=9 (47 obs)
##   Primary splits:
##       i3 < 1.212522     to the left,  improve=0.18523140, (0 missing)
##       i4 < 24.5         to the left,  improve=0.09953909, (0 missing)
##       i2 < 0.0004058315 to the left,  improve=0.03962522, (0 missing)
##       i1 < 0.3229234    to the left,  improve=0.03217129, (0 missing)
##   Surrogate splits:
##       i1 < 0.3571322    to the left,  agree=0.732, adj=0.298, (0 split)
##       i4 < 23.5         to the right, agree=0.642, adj=0.064, (0 split)
##       i2 < 0.0294965    to the left,  agree=0.626, adj=0.021, (0 split)
## 
## Node number 5: 18 observations
##   mean=0.8333333, MSE=0.1388889 
## 
## Node number 6: 32 observations,    complexity param=0.03709767
##   mean=0.71875, MSE=0.2021484 
##   left son=12 (10 obs) right son=13 (22 obs)
##   Primary splits:
##       i1 < 0.4254474    to the left,  improve=0.39429070, (0 missing)
##       i2 < 0.004236634  to the left,  improve=0.27085350, (0 missing)
##       i3 < 1.162948     to the left,  improve=0.18411160, (0 missing)
##       i4 < 24.5         to the right, improve=0.01449275, (0 missing)
## 
## Node number 7: 42 observations
##   mean=1, MSE=0 
## 
## Node number 8: 76 observations,    complexity param=0.0158619
##   mean=0.09210526, MSE=0.08362188 
##   left son=16 (47 obs) right son=17 (29 obs)
##   Primary splits:
##       i1 < 0.3241837    to the left,  improve=0.09722950, (0 missing)
##       i2 < 0.002904424  to the left,  improve=0.06616257, (0 missing)
##       i4 < 26.5         to the left,  improve=0.06283366, (0 missing)
##       i3 < 1.081113     to the right, improve=0.01247863, (0 missing)
##   Surrogate splits:
##       i3 < 1.058231     to the left,  agree=0.645, adj=0.069, (0 split)
## 
## Node number 9: 47 observations,    complexity param=0.09221072
##   mean=0.4680851, MSE=0.2489814 
##   left son=18 (28 obs) right son=19 (19 obs)
##   Primary splits:
##       i4 < 24.5         to the left,  improve=0.49610730, (0 missing)
##       i1 < 0.3037437    to the right, improve=0.09123339, (0 missing)
##       i2 < 0.004423548  to the right, improve=0.06321258, (0 missing)
##       i3 < 1.360607     to the left,  improve=0.05204644, (0 missing)
##   Surrogate splits:
##       i2 < 0.004423548  to the right, agree=0.723, adj=0.316, (0 split)
##       i1 < 0.3430486    to the right, agree=0.702, adj=0.263, (0 split)
##       i3 < 1.547619     to the left,  agree=0.638, adj=0.105, (0 split)
## 
## Node number 12: 10 observations
##   mean=0.3, MSE=0.21 
## 
## Node number 13: 22 observations
##   mean=0.9090909, MSE=0.08264463 
## 
## Node number 16: 47 observations
##   mean=0.0212766, MSE=0.0208239 
## 
## Node number 17: 29 observations,    complexity param=0.0158619
##   mean=0.2068966, MSE=0.1640904 
##   left son=34 (18 obs) right son=35 (11 obs)
##   Primary splits:
##       i3 < 1.081113     to the right, improve=0.22840730, (0 missing)
##       i2 < 0.002785339  to the left,  improve=0.13729980, (0 missing)
##       i4 < 24.5         to the left,  improve=0.13729980, (0 missing)
##       i1 < 0.3412005    to the right, improve=0.09528515, (0 missing)
##   Surrogate splits:
##       i1 < 0.3556962    to the left,  agree=0.793, adj=0.455, (0 split)
##       i2 < 0.01772197   to the left,  agree=0.690, adj=0.182, (0 split)
##       i4 < 23.5         to the right, agree=0.655, adj=0.091, (0 split)
## 
## Node number 18: 28 observations,    complexity param=0.0141925
##   mean=0.1785714, MSE=0.1466837 
##   left son=36 (18 obs) right son=37 (10 obs)
##   Primary splits:
##       i2 < 0.005782026  to the left,  improve=0.18570050, (0 missing)
##       i4 < 23.5         to the left,  improve=0.15108110, (0 missing)
##       i3 < 1.37012      to the left,  improve=0.14202900, (0 missing)
##       i1 < 0.3701228    to the left,  improve=0.02608696, (0 missing)
##   Surrogate splits:
##       i1 < 0.3222502    to the right, agree=0.75, adj=0.3, (0 split)
## 
## Node number 19: 19 observations
##   mean=0.8947368, MSE=0.09418283 
## 
## Node number 34: 18 observations
##   mean=0.05555556, MSE=0.05246914 
## 
## Node number 35: 11 observations
##   mean=0.4545455, MSE=0.2479339 
## 
## Node number 36: 18 observations
##   mean=0.05555556, MSE=0.05246914 
## 
## Node number 37: 10 observations
##   mean=0.4, MSE=0.24
pdf(file="results-rt.pdf")
fancyRpartPlot(regtree,main="Regression Tree: z ~ I1+I2+I3+I4",sub = "")
dev.off()
## png 
##   2

Analysis With BART

#pdf(file="results-bart.pdf")
par(mfrow=c(2,1))
# Analysis of Employement
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
y.e=log(ey3/ey0)[usable]    # Response variale: log-ratio
z.e=(p>0)[usable]     # Mandated status for the available sample
p.e=p[usable] # Score for available sample
d.e=d[usable] # Effective status for available sample
x.insample=as.matrix(data.frame(z.e,d.e,p.e))
x.outsample=x.insample
x.outsample[,1]=1-x.outsample[,1] # Changing the Effective Status
y.e=as.double(y.e)
bartFit = bart(x.train=x.insample,y.train=y.e,x.test=x.outsample,ndpost=10000,nskip=1000)
## 
## 
## Running BART with numeric y
## 
## number of trees: 200
## Prior:
##  k: 2.000000
##  degrees of freedom in sigma prior: 3
##  quantile in sigma prior: 0.900000
##  power and base for tree prior: 2.000000 0.950000
##  use quantiles for rule cut points: 0
## data:
##  number of training observations: 82
##  number of test observations: 82
##  number of explanatory variables: 3
## 
## 
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100) 
## 
## 
## Running mcmc loop:
## iteration: 100 (of 11000)
## iteration: 200 (of 11000)
## iteration: 300 (of 11000)
## iteration: 400 (of 11000)
## iteration: 500 (of 11000)
## iteration: 600 (of 11000)
## iteration: 700 (of 11000)
## iteration: 800 (of 11000)
## iteration: 900 (of 11000)
## iteration: 1000 (of 11000)
## iteration: 1100 (of 11000)
## iteration: 1200 (of 11000)
## iteration: 1300 (of 11000)
## iteration: 1400 (of 11000)
## iteration: 1500 (of 11000)
## iteration: 1600 (of 11000)
## iteration: 1700 (of 11000)
## iteration: 1800 (of 11000)
## iteration: 1900 (of 11000)
## iteration: 2000 (of 11000)
## iteration: 2100 (of 11000)
## iteration: 2200 (of 11000)
## iteration: 2300 (of 11000)
## iteration: 2400 (of 11000)
## iteration: 2500 (of 11000)
## iteration: 2600 (of 11000)
## iteration: 2700 (of 11000)
## iteration: 2800 (of 11000)
## iteration: 2900 (of 11000)
## iteration: 3000 (of 11000)
## iteration: 3100 (of 11000)
## iteration: 3200 (of 11000)
## iteration: 3300 (of 11000)
## iteration: 3400 (of 11000)
## iteration: 3500 (of 11000)
## iteration: 3600 (of 11000)
## iteration: 3700 (of 11000)
## iteration: 3800 (of 11000)
## iteration: 3900 (of 11000)
## iteration: 4000 (of 11000)
## iteration: 4100 (of 11000)
## iteration: 4200 (of 11000)
## iteration: 4300 (of 11000)
## iteration: 4400 (of 11000)
## iteration: 4500 (of 11000)
## iteration: 4600 (of 11000)
## iteration: 4700 (of 11000)
## iteration: 4800 (of 11000)
## iteration: 4900 (of 11000)
## iteration: 5000 (of 11000)
## iteration: 5100 (of 11000)
## iteration: 5200 (of 11000)
## iteration: 5300 (of 11000)
## iteration: 5400 (of 11000)
## iteration: 5500 (of 11000)
## iteration: 5600 (of 11000)
## iteration: 5700 (of 11000)
## iteration: 5800 (of 11000)
## iteration: 5900 (of 11000)
## iteration: 6000 (of 11000)
## iteration: 6100 (of 11000)
## iteration: 6200 (of 11000)
## iteration: 6300 (of 11000)
## iteration: 6400 (of 11000)
## iteration: 6500 (of 11000)
## iteration: 6600 (of 11000)
## iteration: 6700 (of 11000)
## iteration: 6800 (of 11000)
## iteration: 6900 (of 11000)
## iteration: 7000 (of 11000)
## iteration: 7100 (of 11000)
## iteration: 7200 (of 11000)
## iteration: 7300 (of 11000)
## iteration: 7400 (of 11000)
## iteration: 7500 (of 11000)
## iteration: 7600 (of 11000)
## iteration: 7700 (of 11000)
## iteration: 7800 (of 11000)
## iteration: 7900 (of 11000)
## iteration: 8000 (of 11000)
## iteration: 8100 (of 11000)
## iteration: 8200 (of 11000)
## iteration: 8300 (of 11000)
## iteration: 8400 (of 11000)
## iteration: 8500 (of 11000)
## iteration: 8600 (of 11000)
## iteration: 8700 (of 11000)
## iteration: 8800 (of 11000)
## iteration: 8900 (of 11000)
## iteration: 9000 (of 11000)
## iteration: 9100 (of 11000)
## iteration: 9200 (of 11000)
## iteration: 9300 (of 11000)
## iteration: 9400 (of 11000)
## iteration: 9500 (of 11000)
## iteration: 9600 (of 11000)
## iteration: 9700 (of 11000)
## iteration: 9800 (of 11000)
## iteration: 9900 (of 11000)
## iteration: 10000 (of 11000)
## iteration: 10100 (of 11000)
## iteration: 10200 (of 11000)
## iteration: 10300 (of 11000)
## iteration: 10400 (of 11000)
## iteration: 10500 (of 11000)
## iteration: 10600 (of 11000)
## iteration: 10700 (of 11000)
## iteration: 10800 (of 11000)
## iteration: 10900 (of 11000)
## iteration: 11000 (of 11000)
## time for loop: 20
## 
## Tree sizes, last iteration:
## 2 2 2 3 2 2 3 2 2 2 2 2 1 2 2 2 2 2 4 3 
## 2 2 2 3 2 2 3 2 1 2 2 3 2 2 2 2 2 2 2 4 
## 1 2 2 1 2 2 2 3 2 2 2 1 2 2 2 2 2 2 2 3 
## 3 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 3 2 1 
## 2 2 2 4 3 2 3 2 3 2 2 2 3 2 2 2 2 2 2 2 
## 2 4 2 3 2 2 2 2 2 2 2 2 2 3 1 2 3 2 2 2 
## 2 2 3 2 5 2 2 2 2 2 3 2 2 2 2 3 2 2 2 2 
## 2 2 2 2 3 2 1 4 2 3 4 2 2 2 2 2 2 2 2 2 
## 2 2 2 2 2 2 2 3 2 1 2 3 3 3 3 2 2 3 1 2 
## 2 2 2 1 2 2 2 2 3 3 2 2 2 2 3 2 3 4 2 2 
## Variable Usage, last iteration (var:count):
## (1: 61) (2: 76) (3: 102) 
## DONE BART 11-2-2014
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(d.e==1)
cfactual=length(y.e)+which(d.e==0)
post.effect.marg=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
hist(post.effect.marg,nclass=50,prob=TRUE,main="(a) Posterior Effect on \n Occupation",xlab="log(y2003/y2000)",ylab="Density")
mean(post.effect.marg>0)
## [1] 0.9292
mean(post.effect.marg)
## [1] 0.4207876
qe=quantile(post.effect.marg,p=c(0.025,0.975))
qe
##       2.5%      97.5% 
## -0.1428711  0.9954635
exp(qe)
##      2.5%     97.5% 
## 0.8668658 2.7059782
abline(v=qe,lty=2)

quantile(post.effect.marg,p=c(0.005,0.995))
##       0.5%      99.5% 
## -0.3218183  1.1744883
exp(quantile(post.effect.marg,p=c(0.005,0.995)))
##      0.5%     99.5% 
## 0.7248299 3.2364863
quantile(post.effect.marg,p=c(0.025,0.975))
##       2.5%      97.5% 
## -0.1428711  0.9954635
exp(quantile(post.effect.marg,p=c(0.025,0.975)))
##      2.5%     97.5% 
## 0.8668658 2.7059782
quantile(post.effect.marg,p=c(0.05,0.95))
##         5%        95% 
## -0.0519006  0.9042703
exp(quantile(post.effect.marg,p=c(0.05,0.95)))
##        5%       95% 
## 0.9494232 2.4701288
# Analysis of production value
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
y.p=log(py3/py0)[usable]    # Response variale: log-ratio
z.p=(p>0)[usable]     # Mandated status for the available sample
p.p=p[usable] # Score for available sample
d.p=d[usable] # Effective status for available sample
x.insample=as.matrix(data.frame(z.p,d.p,p.p))
x.outsample=x.insample
x.outsample[,1]=1-x.outsample[,1] # Changing the Effective Status
y.p=as.double(y.p)
bartFit = bart(x.train=x.insample,y.train=y.p,x.test=x.outsample,ndpost=10000,nskip=1000)
## 
## 
## Running BART with numeric y
## 
## number of trees: 200
## Prior:
##  k: 2.000000
##  degrees of freedom in sigma prior: 3
##  quantile in sigma prior: 0.900000
##  power and base for tree prior: 2.000000 0.950000
##  use quantiles for rule cut points: 0
## data:
##  number of training observations: 93
##  number of test observations: 93
##  number of explanatory variables: 3
## 
## 
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100) 
## 
## 
## Running mcmc loop:
## iteration: 100 (of 11000)
## iteration: 200 (of 11000)
## iteration: 300 (of 11000)
## iteration: 400 (of 11000)
## iteration: 500 (of 11000)
## iteration: 600 (of 11000)
## iteration: 700 (of 11000)
## iteration: 800 (of 11000)
## iteration: 900 (of 11000)
## iteration: 1000 (of 11000)
## iteration: 1100 (of 11000)
## iteration: 1200 (of 11000)
## iteration: 1300 (of 11000)
## iteration: 1400 (of 11000)
## iteration: 1500 (of 11000)
## iteration: 1600 (of 11000)
## iteration: 1700 (of 11000)
## iteration: 1800 (of 11000)
## iteration: 1900 (of 11000)
## iteration: 2000 (of 11000)
## iteration: 2100 (of 11000)
## iteration: 2200 (of 11000)
## iteration: 2300 (of 11000)
## iteration: 2400 (of 11000)
## iteration: 2500 (of 11000)
## iteration: 2600 (of 11000)
## iteration: 2700 (of 11000)
## iteration: 2800 (of 11000)
## iteration: 2900 (of 11000)
## iteration: 3000 (of 11000)
## iteration: 3100 (of 11000)
## iteration: 3200 (of 11000)
## iteration: 3300 (of 11000)
## iteration: 3400 (of 11000)
## iteration: 3500 (of 11000)
## iteration: 3600 (of 11000)
## iteration: 3700 (of 11000)
## iteration: 3800 (of 11000)
## iteration: 3900 (of 11000)
## iteration: 4000 (of 11000)
## iteration: 4100 (of 11000)
## iteration: 4200 (of 11000)
## iteration: 4300 (of 11000)
## iteration: 4400 (of 11000)
## iteration: 4500 (of 11000)
## iteration: 4600 (of 11000)
## iteration: 4700 (of 11000)
## iteration: 4800 (of 11000)
## iteration: 4900 (of 11000)
## iteration: 5000 (of 11000)
## iteration: 5100 (of 11000)
## iteration: 5200 (of 11000)
## iteration: 5300 (of 11000)
## iteration: 5400 (of 11000)
## iteration: 5500 (of 11000)
## iteration: 5600 (of 11000)
## iteration: 5700 (of 11000)
## iteration: 5800 (of 11000)
## iteration: 5900 (of 11000)
## iteration: 6000 (of 11000)
## iteration: 6100 (of 11000)
## iteration: 6200 (of 11000)
## iteration: 6300 (of 11000)
## iteration: 6400 (of 11000)
## iteration: 6500 (of 11000)
## iteration: 6600 (of 11000)
## iteration: 6700 (of 11000)
## iteration: 6800 (of 11000)
## iteration: 6900 (of 11000)
## iteration: 7000 (of 11000)
## iteration: 7100 (of 11000)
## iteration: 7200 (of 11000)
## iteration: 7300 (of 11000)
## iteration: 7400 (of 11000)
## iteration: 7500 (of 11000)
## iteration: 7600 (of 11000)
## iteration: 7700 (of 11000)
## iteration: 7800 (of 11000)
## iteration: 7900 (of 11000)
## iteration: 8000 (of 11000)
## iteration: 8100 (of 11000)
## iteration: 8200 (of 11000)
## iteration: 8300 (of 11000)
## iteration: 8400 (of 11000)
## iteration: 8500 (of 11000)
## iteration: 8600 (of 11000)
## iteration: 8700 (of 11000)
## iteration: 8800 (of 11000)
## iteration: 8900 (of 11000)
## iteration: 9000 (of 11000)
## iteration: 9100 (of 11000)
## iteration: 9200 (of 11000)
## iteration: 9300 (of 11000)
## iteration: 9400 (of 11000)
## iteration: 9500 (of 11000)
## iteration: 9600 (of 11000)
## iteration: 9700 (of 11000)
## iteration: 9800 (of 11000)
## iteration: 9900 (of 11000)
## iteration: 10000 (of 11000)
## iteration: 10100 (of 11000)
## iteration: 10200 (of 11000)
## iteration: 10300 (of 11000)
## iteration: 10400 (of 11000)
## iteration: 10500 (of 11000)
## iteration: 10600 (of 11000)
## iteration: 10700 (of 11000)
## iteration: 10800 (of 11000)
## iteration: 10900 (of 11000)
## iteration: 11000 (of 11000)
## time for loop: 23
## 
## Tree sizes, last iteration:
## 2 2 2 2 2 2 3 3 4 2 4 3 2 2 3 3 1 2 3 3 
## 2 1 2 2 2 1 1 2 2 2 2 2 3 2 2 3 2 2 2 2 
## 2 2 2 3 2 2 2 2 2 2 3 1 3 2 2 3 2 3 3 2 
## 1 3 3 4 2 2 2 2 2 2 3 3 2 2 2 3 2 2 3 1 
## 2 3 2 2 3 2 2 2 2 2 2 2 2 2 2 3 2 2 2 3 
## 2 2 2 2 2 3 4 1 3 2 3 2 2 3 2 2 2 2 3 2 
## 3 1 3 2 2 2 3 3 4 2 1 2 2 1 4 2 3 2 2 4 
## 2 2 2 2 3 2 2 2 4 2 2 2 2 2 2 2 1 2 3 2 
## 2 2 1 2 3 1 2 2 2 2 2 2 2 2 2 3 2 2 3 2 
## 3 2 2 1 2 2 3 2 2 2 2 2 4 2 2 2 2 2 2 2 
## Variable Usage, last iteration (var:count):
## (1: 75) (2: 77) (3: 93) 
## DONE BART 11-2-2014
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(d.p==1)
cfactual=93+which(d.p==0)
post.effect.marg=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
hist(post.effect.marg,nclass=50,prob=TRUE,main="(b) Posterior Effect on \n Production Value",xlab="log(y2003/y2000)",ylab="Density")
mean(post.effect.marg>0)
## [1] 0.8402
mean(post.effect.marg)
## [1] 0.3466506
qe=quantile(post.effect.marg,p=c(0.025,0.975))
qe
##       2.5%      97.5% 
## -0.3371866  1.0448165
abline(v=qe,lty=2)

quantile(post.effect.marg,p=c(0.005,0.995))
##       0.5%      99.5% 
## -0.5636381  1.2728543
exp(quantile(post.effect.marg,p=c(0.005,0.995)))
##      0.5%     99.5% 
## 0.5691347 3.5710307
quantile(post.effect.marg,p=c(0.025,0.975))
##       2.5%      97.5% 
## -0.3371866  1.0448165
exp(quantile(post.effect.marg,p=c(0.025,0.975)))
##      2.5%     97.5% 
## 0.7137756 2.8428769
quantile(post.effect.marg,p=c(0.5,0.95))
##       50%       95% 
## 0.3480397 0.9306671
exp(quantile(post.effect.marg,p=c(0.05,0.95)))
##       5%      95% 
## 0.787031 2.536200
#dev.off()

Analysis with Causal Forests

usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
Y=log(ey3/ey0)[usable]    # Response variale: log-ratio
W=d[usable]    # Effective status for available sample
p.e=p[usable] # Score for available sample
X=as.matrix(data.frame(p.e))

crf.e=causal_forest(X=X,Y=Y,W=W,
                  num.trees = 10000,honesty = TRUE)
ate.e=average_treatment_effect(crf.e, target.sample = "overlap")

usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
Y=log(py3/py0)[usable]    # Response variale: log-ratio
W=d[usable]    # Effective status for available sample
p.p=p[usable] # Score for available sample
X=as.matrix(data.frame(p.p))

crf.p=causal_forest(X=X,Y=Y,W=W,
                    num.trees = 10000,honesty = TRUE)
ate.p=average_treatment_effect(crf.p, target.sample = "overlap")


inf=round(ate.e[1]+qnorm(c(0.005,0.025,0.05))*ate.e[2],2)
sup=round(ate.e[1]-qnorm(c(0.005,0.025,0.05))*ate.e[2],2)
cbind(inf,sup)
##        inf  sup
## [1,] -0.28 0.97
## [2,] -0.13 0.82
## [3,] -0.05 0.74
inf=round(ate.p[1]+qnorm(c(0.005,0.025,0.05))*ate.p[2],2)
sup=round(ate.p[1]-qnorm(c(0.005,0.025,0.05))*ate.p[2],2)
cbind(inf,sup)
##        inf  sup
## [1,] -0.28 0.91
## [2,] -0.13 0.76
## [3,] -0.06 0.69