Enterprises are sorted by score (p variable)
source("read-dat.R")
head(dat)
## i1 i2 i3 i4 p z d ey0 ey3 py0 py3 dy0
## 276/99 0.3000000 0.000000000 1.000000 19 -4.645011 0 0 10 NA 3616 NA NA
## 343/99 0.3000000 0.002730963 1.000000 21 -3.606017 0 0 NA NA NA 315 NA
## 257/99 0.3000000 0.006521739 1.000000 23 -3.293505 0 0 NA NA NA NA NA
## D1500264 0.3000344 0.001377885 1.000000 22 -3.237792 0 1 18 21 7983 9409 NA
## 307/99 0.3214286 0.011127107 1.176471 21 -3.173211 0 0 1 NA 484 NA NA
## D1500228 0.3227699 0.003129890 1.052632 23 -3.114608 0 0 2 NA 269 257 NA
## dpy3
## 276/99 NA
## 343/99 NA
## 257/99 NA
## D1500264 NA
## 307/99 NA
## D1500228 NA
These Functions returns the + (.plus) and - (.minus) in alpha.hat formula
minimi.plus=function(X,S,K,Z){
A=sum(K*Z*X)
B=sum(K*Z)
C=sum(Z*K*S)
D=sum(Z*K*S*X)
E=sum(K*Z*(S^2))
b.plus=(A*C-B*D)/(C^2-B*E)
x.plus=(A-b.plus*C)/B
return(c(x.plus,b.plus))}
minimi.minus=function(X,S,K,Z){
A=sum(K*(1-Z)*X)
B=sum(K*(1-Z))
C=sum((1-Z)*K*S)
D=sum((1-Z)*K*S*X)
E=sum(K*(1-Z)*(S^2))
b.minus=(A*C-B*D)/(C^2-B*E)
x.minus=(A-b.minus*C)/B
return(c(x.minus,b.minus))}
This function returns Calibrated BootStrap CI that is, the nominal coverage correspond to actua coverage
bootci=function(y,p,K,z,d,M=1000,init.seed=NULL){
if(!is.null(init.seed)) set.seed(init.seed)
alpha.hat=function(bdat){
y=bdat$y
p=bdat$p
K=bdat$K
z=bdat$z
d=bdat$d
alpha.hat=(minimi.plus(y,p,K,z)[1]-minimi.minus(y,p,K,z)[1])/
(minimi.plus(d,p,K,z)[1]-minimi.minus(d,p,K,z)[1])
return(alpha.hat)
}
bdat=data.frame(y=y,p=p,K=K,z=z,d=d)
res=suppressWarnings (bcajack(x = bdat, B = M, func = alpha.hat,
m = nrow(bdat)/20,
alpha = c(0.005,0.025,0.05),verbose = FALSE))
return(c(res$lims[c(1,7),1],res$lims[c(2,6),1],
res$lims[c(3,5),1]))
}
Here we provide the results in the paper.
attach(dat)
p.bar=-0.41067715 # Threshold selection
n=length(p);n # Sample size used to estimate the weithing function
## [1] 215
p=p-p.bar # Centered score
dens=density(p) # kernel density
K=rep(NA,n)
for(i in 1:n) K[i]=mean(c(dens$y[dens$x==min(dens$x[dens$x>=p[i]])],
dens$y[dens$x==max(dens$x[dens$x<=p[i]])]))
K=K/sum(K)
# Analysis of production value
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
y.p=log(py3/py0)[usable] # Response variale: log-ratio
z.p=(p>0)[usable] # Mandated status for the available sample
p.p=p[usable] # Score for available sample
d.p=d[usable] # Effective status for available sample
K.p=K[usable] # Weight for available sample
table(z.p,d.p) # Tables 1 and 2
## d.p
## z.p 0 1
## FALSE 26 13
## TRUE 9 45
ypiu=minimi.plus(y.p,p.p,K.p,z.p) # y+ estimate
ymeno=minimi.minus(y.p,p.p,K.p,z.p) # y- estimate
dpiu=minimi.plus(d.p,p.p,K.p,z.p) # d+ estimate
dmeno=minimi.minus(d.p,p.p,K.p,z.p) # d- estimate
cat("Estimated proportion of Compliers: ",dpiu[1]-dmeno[1],"\n")
## Estimated proportion of Compliers: 0.5822739
alpha.hat.p=(ypiu[1]-ymeno[1])/(dpiu[1]-dmeno[1]) # alpha.hat
alpha.hat.p
## [1] 0.4442994
exp(alpha.hat.p)
## [1] 1.559397
# Boot Confidence Interval for effect on Production Value
bootci(y=y.p,p=p.p,K=K.p,z=z.p,d=d.p,M=100000,init.seed=17)
## 0.005 0.995 0.025 0.975 0.05 0.95
## NA NA NA NA NA NA
# Analysis of Employement
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
y.e=log(ey3/ey0)[usable] # Response variale: log-ratio
z.e=(p>0)[usable] # Mandated status for the available sample
p.e=p[usable] # Score for available sample
d.e=d[usable] # Effective status for available sample
K.e=K[usable] # Weight for available sample
table(z.e,d.e) # Tables 1 and 2
## d.e
## z.e 0 1
## FALSE 22 13
## TRUE 8 39
ypiu=minimi.plus(y.e,p.e,K.e,z.e) # y+ estimate
ymeno=minimi.minus(y.e,p.e,K.e,z.e) # y- estimate
dpiu=minimi.plus(d.e,p.e,K.e,z.e) # d+ estimate
dmeno=minimi.minus(d.e,p.e,K.e,z.e) # d- estimate
cat("Estimated proportion of Compliers: ",dpiu[1]-dmeno[1],"\n")
## Estimated proportion of Compliers: 0.5589891
alpha.hat.e=(ypiu[1]-ymeno[1])/(dpiu[1]-dmeno[1]) # alpha.hat
alpha.hat.e
## [1] 0.5666168
exp(alpha.hat.e)
## [1] 1.762295
# Boot Confidence Interval for effect on Employement
bootci(y=y.e,p=p.e,K=K.e,z=z.e,d=d.e,M=100000,init.seed=17)
## 0.005 0.995 0.025 0.975 0.05 0.95
## -0.87050162 5.68336626 -0.14282487 2.44847531 -0.03279575 1.82225586
# Figure 1
#pdf(file="results.pdf")
hl=-4
par(mfrow=c(2,1),mar=c(5, 5, 4, 2)-1)
# Figure Results for Employement
plot(p.e+p.bar,y.e,pch=21-d.e*2,xlab="p",ylab="y",
main="(a) Effect on Occupation",ylim=c(hl,5),xlim=c(-3,4))
abline(v=p.bar,lty=2)
abline(h=0)
legend(2,5,c("Treated","Non-treated"),pch=c(19,21))
lines(p.e[p.e>0]+p.bar,ypiu[1]+ypiu[2]*( p.e[p.e>0]+p.bar),lwd=2)
lines(p.e[p.e<0]+p.bar,ymeno[1]+ymeno[2]*( p.e[p.e<0]+p.bar),lwd=2)
tonalita.grigio= grey(1-K.e/max(K.e))
for(i in 1:(length(p.e)-1)) lines(c(p.e[i]+p.bar, p.e[i+1]+p.bar),
c(hl,hl),col= tonalita.grigio[i],lwd=12)
tonalita.grigio= grey(K/max(K))
for(i in c(seq(1,length(p.e),by=8),
length(p.e))) text(p.e[i]+p.bar,hl,round(K[i],2),
col=tonalita.grigio[i],cex=0.6)
# Figure Results for Production Value
plot(p.p+p.bar,y.p,pch=21-d.p*2,xlab="p",ylab="y",
main="(b) Effect on Production Value",ylim=c(hl,5),xlim=c(-3,4))
abline(v=p.bar,lty=2)
abline(h=0)
legend(2,5,c("Treated","Non-reated"),pch=c(19,21))
lines(p.p[p.p>0]+p.bar,ypiu[1]+ypiu[2]*( p.p[p.p>0]+p.bar),lwd=2)
lines(p.p[p.p<0]+p.bar,ymeno[1]+ymeno[2]*( p.p[p.p<0]+p.bar),lwd=2)
tonalita.grigio= grey(1-K.p/max(K.p))
for(i in 1:(length(p.p)-1)) lines(c(p.p[i]+p.bar,
p.p[i+1]+p.bar),c(hl,hl),
col= tonalita.grigio[i],lwd=12)
tonalita.grigio= grey(K/max(K))
for(i in c(seq(1,length(p.p),by=8),length(p.p))){
text(p.p[i]+p.bar,hl,round(K[i],2),col=tonalita.grigio[i],cex=0.6)}
#dev.off()
nrep=1000
sim.res=data.frame(NULL)
snsim=c(seq(100,250,by=10))
sgamma.shape=c(1/25,100)
sprop.compliers=c(0.2,0.9)
for(nsim in snsim){
for(gamma.shape in sgamma.shape){
for(prop.compliers in sprop.compliers){
rrp=foreach(i=1:nrep,.combine = "rbind",
.packages = c("bcaboot","BayesTree","grf")) %dopar% {
ids=which(nsim%in%snsim)*100000+
which(gamma.shape%in%sgamma.shape)*10000+
which(prop.compliers%in%sprop.compliers)*1000+i
set.seed(ids)
psim=runif(nsim,-1,1)
psim=psim-mean(psim)
zsim=psim<0
denssim=density(psim) # kernel density
Ksim=rep(NA,nsim)
for(i in 1:nsim) Ksim[i]=mean(
c(denssim$y[denssim$x==min(denssim$x[denssim$x>=psim[i]])],
denssim$y[denssim$x==max(denssim$x[denssim$x<=psim[i]])]))
Ksim=Ksim/sum(Ksim)
ysim=rgamma(nsim,shape=gamma.shape,rate=1)
dsim=0
while((sum(dsim)==0)|(sum(dsim)==nsim)){
dsim=1*(zsim)
ii=sample(which(psim<0),
size=floor(sum(psim<0)*(1-prop.compliers)))
dsim[ii]=1-dsim[ii]
}
rr=bootci(y=log(ysim),p=psim,K=Ksim,z=zsim,d=dsim,M=10000)
intres=rbind(
data.frame(inf=rr[1],sup=rr[2],lev=0.999),
data.frame(inf=rr[3],sup=rr[4],lev=0.99),
data.frame(inf=rr[5],sup=rr[6],lev=0.95),
data.frame(inf=rr[7],sup=rr[8],lev=0.90))
## Analysis with BART
x.insample=as.matrix(data.frame(zsim,dsim,psim))
x.outsample=x.insample
# Changing the Effective Status
x.outsample[,1]=1-x.outsample[,1]
y.e=log(as.double(ysim))
bartFit = bart(x.train=x.insample,y.train=y.e,
x.test=x.outsample,
ndpost=10000,nskip=1000,verbose = FALSE)
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(dsim==1)
cfactual=length(ysim)+which(dsim==0)
pemb=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
intresbart=rbind(
data.frame(inf=quantile(pemb,p=0.0005),sup=quantile(pemb,p=0.9995),
lev=0.999),
data.frame(inf=quantile(pemb,p=0.005),sup=quantile(pemb,p=0.995),
lev=0.99),
data.frame(inf=quantile(pemb,p=0.025),sup=quantile(pemb,p=0.975),
lev=0.95),
data.frame(inf=quantile(pemb,p=0.05),sup=quantile(pemb,p=0.95),
lev=0.90))
## Analysis with Causal Forest
X=as.matrix(data.frame(psim))
crfsim=causal_forest(X=X,Y=log(ysim),W=zsim,
num.trees = 10000,honesty = TRUE)
atesim=average_treatment_effect(crfsim,
target.sample = "overlap")
infcf=round(atesim[1]+
qnorm(c(0.0005,0.005,0.025,0.05))*atesim[2],2)
supcf=round(atesim[1]-
qnorm(c(0.0005,0.005,0.025,0.05))*atesim[2],2)
intrescf=cbind(inf=infcf,
sup=supcf,lev=c(0.999,0.99,0.95,0.9))
tt=factor(c("Boot","Bart","CF")[gl(3,4)])
intresff=cbind(rbind(intres,intresbart,intrescf),
data.frame(ids=ids,n=nsim,
gamma.shape=gamma.shape,
prop.compliers=prop.compliers
,nrep=nrep),technique=tt)
return(intresff)
}
sim.res=rbind(sim.res,rrp)
save(sim.res,file="simulres-all1.RData")
cat(prop.compliers,gamma.shape,nsim,"\n")
}
}
}
sim.res=sim.res[-1,]
save(sim.res,file="simulres-all1.RData")
Analysis of simulations
load(file="simres.RData")
sim.res$technique=factor(as.character(sim.res$technique),
levels=c("Boot","Bart","CF"),
labels=c("Bootstrap","BART","Causal Forest"))
sim.res = sim.res %>%
filter(abs(inf)<Inf,abs(sup)<Inf) %>%
mutate(inside=(inf<0)&(sup>0),int.wide=sup-inf) %>%
filter(int.wide<quantile(int.wide,p=0.99),n>100) %>%
mutate(skewness=2/sqrt(gamma.shape)) %>%
rename(Method=technique)
res1= sim.res %>%
group_by(Method,lev,n,skewness,prop.compliers) %>%
summarise(cov=mean(inside),
sdcov=sqrt(mean(inside)*(1-mean(inside))/length(inside)),
wide.mean=mean(int.wide),wide.sd=sd(int.wide)/sqrt(length(int.wide))) %>%
mutate(min.cov=cov+qnorm(0.95)*sdcov,max.cov=cov-qnorm(0.95)*sdcov,
min.wide=wide.mean+qnorm(0.95)*wide.sd,max.wide=wide.mean-qnorm(0.95)*wide.sd) %>%
ungroup() %>%
filter(lev%in%c(0.95,0.99),prop.compliers%in%c(0.5,0.9)) %>%
mutate(skewness=round(skewness,1))
## `summarise()` has grouped output by 'Method', 'lev', 'n', 'skewness'. You can
## override using the `.groups` argument.
# Custom labels for lev and prop.compliers
custom_labels <- function(variable, value) {
if (variable == "lev") {
return(paste("lev=", round(value*100,0),"%", sep = ""))
}
if (variable == "prop.compliers") {
return(paste("p.comp.=", round(value*100,0),"%", sep = ""))
}
if (variable == "skewness") {
return(paste("skewness=", round(value,1), sep = ""))
}
return(as.character(value))
}
g1=ggplot(aes(x=n,y=cov,ymin=min.cov,ymax=max.cov,color=Method,shape = Method),data=res1)+
geom_pointrange()+
facet_grid(skewness+prop.compliers~lev,scales = "free_y",
labeller = custom_labels)+
geom_hline(data = res1, aes(yintercept = lev), inherit.aes = FALSE,linetype="dotted") +
theme_bw()+ylab("Coverage")
## Warning: The `labeller` API has been updated. Labellers taking `variable` and `value`
## arguments are now deprecated.
## ℹ See labellers documentation.
## Warning in geom_hline(data = res1, aes(yintercept = lev), inherit.aes = FALSE,
## : Ignoring unknown parameters: `inherit.aes`
g1
g2=ggplot(aes(x=n,y=wide.mean,ymin=min.wide,ymax=max.wide,color=Method,shape = Method),data=res1)+
geom_pointrange()+
facet_grid(skewness+prop.compliers~lev,scales = "free_y",
labeller = custom_labels)+
geom_point()+theme_bw()+ylab("Interval Lenght")
## Warning: The `labeller` API has been updated. Labellers taking `variable` and `value`
## arguments are now deprecated.
## ℹ See labellers documentation.
g2
pdf(file="simcov.pdf")
g1
dev.off()
## quartz_off_screen
## 2
pdf(file="simlength.pdf")
g2
dev.off()
## quartz_off_screen
## 2
library(rpart)
library(rpart.plot)
library(rattle)
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
regtree <- rpart(z ~ i1+i2+i3+i4)
regtree
## n= 215
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 215 53.7395300 0.50697670
## 2) i1< 0.3978639 141 30.2695000 0.31205670
## 4) i4< 27.5 123 22.1626000 0.23577240
## 8) i3< 1.212522 76 6.3552630 0.09210526
## 16) i1< 0.3241837 47 0.9787234 0.02127660 *
## 17) i1>=0.3241837 29 4.7586210 0.20689660
## 34) i3>=1.081113 18 0.9444444 0.05555556 *
## 35) i3< 1.081113 11 2.7272730 0.45454550 *
## 9) i3>=1.212522 47 11.7021300 0.46808510
## 18) i4< 24.5 28 4.1071430 0.17857140
## 36) i2< 0.005782026 18 0.9444444 0.05555556 *
## 37) i2>=0.005782026 10 2.4000000 0.40000000 *
## 19) i4>=24.5 19 1.7894740 0.89473680 *
## 5) i4>=27.5 18 2.5000000 0.83333330 *
## 3) i1>=0.3978639 74 7.9054050 0.87837840
## 6) i4< 25.5 32 6.4687500 0.71875000
## 12) i1< 0.4254474 10 2.1000000 0.30000000 *
## 13) i1>=0.4254474 22 1.8181820 0.90909090 *
## 7) i4>=25.5 42 0.0000000 1.00000000 *
summary(regtree)
## Call:
## rpart(formula = z ~ i1 + i2 + i3 + i4)
## n= 215
##
## CP nsplit rel error xerror xstd
## 1 0.28963083 0 1.0000000 1.0038491 0.002693989
## 2 0.10433477 1 0.7103692 0.7848672 0.062036591
## 3 0.09221072 2 0.6060344 0.7503427 0.070635492
## 4 0.03709767 4 0.4216130 0.6728993 0.076844316
## 5 0.01586190 6 0.3474176 0.6817847 0.084226305
## 6 0.01419250 8 0.3156938 0.6664058 0.083140186
## 7 0.01000000 9 0.3015013 0.6629894 0.083510572
##
## Variable importance
## i1 i4 i3 i2
## 49 29 14 7
##
## Node number 1: 215 observations, complexity param=0.2896308
## mean=0.5069767, MSE=0.2499513
## left son=2 (141 obs) right son=3 (74 obs)
## Primary splits:
## i1 < 0.3978639 to the left, improve=0.28963080, (0 missing)
## i3 < 1.162948 to the left, improve=0.12175900, (0 missing)
## i4 < 24.5 to the left, improve=0.11420310, (0 missing)
## i2 < 0.01909164 to the left, improve=0.03893459, (0 missing)
## Surrogate splits:
## i3 < 1.40873 to the left, agree=0.665, adj=0.027, (0 split)
## i2 < 0.02380738 to the left, agree=0.660, adj=0.014, (0 split)
##
## Node number 2: 141 observations, complexity param=0.1043348
## mean=0.3120567, MSE=0.2146773
## left son=4 (123 obs) right son=5 (18 obs)
## Primary splits:
## i4 < 27.5 to the left, improve=0.1852327, (0 missing)
## i3 < 1.162948 to the left, improve=0.1219094, (0 missing)
## i2 < 0.01982193 to the left, improve=0.0399378, (0 missing)
## i1 < 0.3007355 to the left, improve=0.0366842, (0 missing)
##
## Node number 3: 74 observations, complexity param=0.03709767
## mean=0.8783784, MSE=0.1068298
## left son=6 (32 obs) right son=7 (42 obs)
## Primary splits:
## i4 < 25.5 to the left, improve=0.18173080, (0 missing)
## i1 < 0.4186274 to the left, improve=0.16624200, (0 missing)
## i3 < 1.087085 to the left, improve=0.09042496, (0 missing)
## i2 < 0.004128366 to the left, improve=0.05160640, (0 missing)
## Surrogate splits:
## i2 < 0.005751243 to the right, agree=0.635, adj=0.156, (0 split)
## i3 < 1.03652 to the left, agree=0.608, adj=0.094, (0 split)
## i1 < 0.712 to the right, agree=0.595, adj=0.063, (0 split)
##
## Node number 4: 123 observations, complexity param=0.09221072
## mean=0.2357724, MSE=0.1801838
## left son=8 (76 obs) right son=9 (47 obs)
## Primary splits:
## i3 < 1.212522 to the left, improve=0.18523140, (0 missing)
## i4 < 24.5 to the left, improve=0.09953909, (0 missing)
## i2 < 0.0004058315 to the left, improve=0.03962522, (0 missing)
## i1 < 0.3229234 to the left, improve=0.03217129, (0 missing)
## Surrogate splits:
## i1 < 0.3571322 to the left, agree=0.732, adj=0.298, (0 split)
## i4 < 23.5 to the right, agree=0.642, adj=0.064, (0 split)
## i2 < 0.0294965 to the left, agree=0.626, adj=0.021, (0 split)
##
## Node number 5: 18 observations
## mean=0.8333333, MSE=0.1388889
##
## Node number 6: 32 observations, complexity param=0.03709767
## mean=0.71875, MSE=0.2021484
## left son=12 (10 obs) right son=13 (22 obs)
## Primary splits:
## i1 < 0.4254474 to the left, improve=0.39429070, (0 missing)
## i2 < 0.004236634 to the left, improve=0.27085350, (0 missing)
## i3 < 1.162948 to the left, improve=0.18411160, (0 missing)
## i4 < 24.5 to the right, improve=0.01449275, (0 missing)
##
## Node number 7: 42 observations
## mean=1, MSE=0
##
## Node number 8: 76 observations, complexity param=0.0158619
## mean=0.09210526, MSE=0.08362188
## left son=16 (47 obs) right son=17 (29 obs)
## Primary splits:
## i1 < 0.3241837 to the left, improve=0.09722950, (0 missing)
## i2 < 0.002904424 to the left, improve=0.06616257, (0 missing)
## i4 < 26.5 to the left, improve=0.06283366, (0 missing)
## i3 < 1.081113 to the right, improve=0.01247863, (0 missing)
## Surrogate splits:
## i3 < 1.058231 to the left, agree=0.645, adj=0.069, (0 split)
##
## Node number 9: 47 observations, complexity param=0.09221072
## mean=0.4680851, MSE=0.2489814
## left son=18 (28 obs) right son=19 (19 obs)
## Primary splits:
## i4 < 24.5 to the left, improve=0.49610730, (0 missing)
## i1 < 0.3037437 to the right, improve=0.09123339, (0 missing)
## i2 < 0.004423548 to the right, improve=0.06321258, (0 missing)
## i3 < 1.360607 to the left, improve=0.05204644, (0 missing)
## Surrogate splits:
## i2 < 0.004423548 to the right, agree=0.723, adj=0.316, (0 split)
## i1 < 0.3430486 to the right, agree=0.702, adj=0.263, (0 split)
## i3 < 1.547619 to the left, agree=0.638, adj=0.105, (0 split)
##
## Node number 12: 10 observations
## mean=0.3, MSE=0.21
##
## Node number 13: 22 observations
## mean=0.9090909, MSE=0.08264463
##
## Node number 16: 47 observations
## mean=0.0212766, MSE=0.0208239
##
## Node number 17: 29 observations, complexity param=0.0158619
## mean=0.2068966, MSE=0.1640904
## left son=34 (18 obs) right son=35 (11 obs)
## Primary splits:
## i3 < 1.081113 to the right, improve=0.22840730, (0 missing)
## i2 < 0.002785339 to the left, improve=0.13729980, (0 missing)
## i4 < 24.5 to the left, improve=0.13729980, (0 missing)
## i1 < 0.3412005 to the right, improve=0.09528515, (0 missing)
## Surrogate splits:
## i1 < 0.3556962 to the left, agree=0.793, adj=0.455, (0 split)
## i2 < 0.01772197 to the left, agree=0.690, adj=0.182, (0 split)
## i4 < 23.5 to the right, agree=0.655, adj=0.091, (0 split)
##
## Node number 18: 28 observations, complexity param=0.0141925
## mean=0.1785714, MSE=0.1466837
## left son=36 (18 obs) right son=37 (10 obs)
## Primary splits:
## i2 < 0.005782026 to the left, improve=0.18570050, (0 missing)
## i4 < 23.5 to the left, improve=0.15108110, (0 missing)
## i3 < 1.37012 to the left, improve=0.14202900, (0 missing)
## i1 < 0.3701228 to the left, improve=0.02608696, (0 missing)
## Surrogate splits:
## i1 < 0.3222502 to the right, agree=0.75, adj=0.3, (0 split)
##
## Node number 19: 19 observations
## mean=0.8947368, MSE=0.09418283
##
## Node number 34: 18 observations
## mean=0.05555556, MSE=0.05246914
##
## Node number 35: 11 observations
## mean=0.4545455, MSE=0.2479339
##
## Node number 36: 18 observations
## mean=0.05555556, MSE=0.05246914
##
## Node number 37: 10 observations
## mean=0.4, MSE=0.24
pdf(file="results-rt.pdf")
fancyRpartPlot(regtree,main="Regression Tree: z ~ I1+I2+I3+I4",sub = "")
dev.off()
## quartz_off_screen
## 2
#pdf(file="results-bart.pdf")
par(mfrow=c(2,1))
# Analysis of Employement
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
y.e=log(ey3/ey0)[usable] # Response variale: log-ratio
z.e=(p>0)[usable] # Mandated status for the available sample
p.e=p[usable] # Score for available sample
d.e=d[usable] # Effective status for available sample
x.insample=as.matrix(data.frame(z.e,d.e,p.e))
x.outsample=x.insample
x.outsample[,1]=1-x.outsample[,1] # Changing the Effective Status
y.e=as.double(y.e)
bartFit = bart(x.train=x.insample,y.train=y.e,x.test=x.outsample,ndpost=10000,nskip=1000)
##
##
## Running BART with numeric y
##
## number of trees: 200
## Prior:
## k: 2.000000
## degrees of freedom in sigma prior: 3
## quantile in sigma prior: 0.900000
## power and base for tree prior: 2.000000 0.950000
## use quantiles for rule cut points: 0
## data:
## number of training observations: 82
## number of test observations: 82
## number of explanatory variables: 3
##
##
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100)
##
##
## Running mcmc loop:
## iteration: 100 (of 11000)
## iteration: 200 (of 11000)
## iteration: 300 (of 11000)
## iteration: 400 (of 11000)
## iteration: 500 (of 11000)
## iteration: 600 (of 11000)
## iteration: 700 (of 11000)
## iteration: 800 (of 11000)
## iteration: 900 (of 11000)
## iteration: 1000 (of 11000)
## iteration: 1100 (of 11000)
## iteration: 1200 (of 11000)
## iteration: 1300 (of 11000)
## iteration: 1400 (of 11000)
## iteration: 1500 (of 11000)
## iteration: 1600 (of 11000)
## iteration: 1700 (of 11000)
## iteration: 1800 (of 11000)
## iteration: 1900 (of 11000)
## iteration: 2000 (of 11000)
## iteration: 2100 (of 11000)
## iteration: 2200 (of 11000)
## iteration: 2300 (of 11000)
## iteration: 2400 (of 11000)
## iteration: 2500 (of 11000)
## iteration: 2600 (of 11000)
## iteration: 2700 (of 11000)
## iteration: 2800 (of 11000)
## iteration: 2900 (of 11000)
## iteration: 3000 (of 11000)
## iteration: 3100 (of 11000)
## iteration: 3200 (of 11000)
## iteration: 3300 (of 11000)
## iteration: 3400 (of 11000)
## iteration: 3500 (of 11000)
## iteration: 3600 (of 11000)
## iteration: 3700 (of 11000)
## iteration: 3800 (of 11000)
## iteration: 3900 (of 11000)
## iteration: 4000 (of 11000)
## iteration: 4100 (of 11000)
## iteration: 4200 (of 11000)
## iteration: 4300 (of 11000)
## iteration: 4400 (of 11000)
## iteration: 4500 (of 11000)
## iteration: 4600 (of 11000)
## iteration: 4700 (of 11000)
## iteration: 4800 (of 11000)
## iteration: 4900 (of 11000)
## iteration: 5000 (of 11000)
## iteration: 5100 (of 11000)
## iteration: 5200 (of 11000)
## iteration: 5300 (of 11000)
## iteration: 5400 (of 11000)
## iteration: 5500 (of 11000)
## iteration: 5600 (of 11000)
## iteration: 5700 (of 11000)
## iteration: 5800 (of 11000)
## iteration: 5900 (of 11000)
## iteration: 6000 (of 11000)
## iteration: 6100 (of 11000)
## iteration: 6200 (of 11000)
## iteration: 6300 (of 11000)
## iteration: 6400 (of 11000)
## iteration: 6500 (of 11000)
## iteration: 6600 (of 11000)
## iteration: 6700 (of 11000)
## iteration: 6800 (of 11000)
## iteration: 6900 (of 11000)
## iteration: 7000 (of 11000)
## iteration: 7100 (of 11000)
## iteration: 7200 (of 11000)
## iteration: 7300 (of 11000)
## iteration: 7400 (of 11000)
## iteration: 7500 (of 11000)
## iteration: 7600 (of 11000)
## iteration: 7700 (of 11000)
## iteration: 7800 (of 11000)
## iteration: 7900 (of 11000)
## iteration: 8000 (of 11000)
## iteration: 8100 (of 11000)
## iteration: 8200 (of 11000)
## iteration: 8300 (of 11000)
## iteration: 8400 (of 11000)
## iteration: 8500 (of 11000)
## iteration: 8600 (of 11000)
## iteration: 8700 (of 11000)
## iteration: 8800 (of 11000)
## iteration: 8900 (of 11000)
## iteration: 9000 (of 11000)
## iteration: 9100 (of 11000)
## iteration: 9200 (of 11000)
## iteration: 9300 (of 11000)
## iteration: 9400 (of 11000)
## iteration: 9500 (of 11000)
## iteration: 9600 (of 11000)
## iteration: 9700 (of 11000)
## iteration: 9800 (of 11000)
## iteration: 9900 (of 11000)
## iteration: 10000 (of 11000)
## iteration: 10100 (of 11000)
## iteration: 10200 (of 11000)
## iteration: 10300 (of 11000)
## iteration: 10400 (of 11000)
## iteration: 10500 (of 11000)
## iteration: 10600 (of 11000)
## iteration: 10700 (of 11000)
## iteration: 10800 (of 11000)
## iteration: 10900 (of 11000)
## iteration: 11000 (of 11000)
## time for loop: 17
##
## Tree sizes, last iteration:
## 1 3 4 3 2 2 2 3 3 3 2 2 3 2 3 2 2 2 3 2
## 2 2 2 1 3 2 3 2 2 3 2 2 2 2 2 3 2 2 2 2
## 2 2 1 4 2 2 3 2 2 2 1 2 2 2 2 2 2 2 2 3
## 2 2 2 2 3 2 2 2 3 2 2 3 2 3 4 2 2 3 2 4
## 3 3 1 2 3 2 2 2 2 3 2 2 2 2 2 2 2 3 2 2
## 1 2 2 2 2 2 3 2 2 2 2 2 1 2 2 1 3 1 2 2
## 2 2 2 2 2 2 2 3 2 2 2 3 2 2 3 3 4 2 2 3
## 2 2 2 2 2 2 3 2 2 2 4 2 2 2 1 2 2 2 2 1
## 2 2 3 2 3 2 5 2 2 2 2 2 1 2 2 2 3 3 2 2
## 2 2 2 2 2 2 2 2 2 2 3 1 2 2 2 4 2 2 2 4
## Variable Usage, last iteration (var:count):
## (1: 51) (2: 83) (3: 109)
## DONE BART 11-2-2014
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(d.e==1)
cfactual=length(y.e)+which(d.e==0)
post.effect.marg=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
hist(post.effect.marg,nclass=50,prob=TRUE,main="(a) Posterior Effect on \n Occupation",xlab="log(y2003/y2000)",ylab="Density")
mean(post.effect.marg>0)
## [1] 0.9232
mean(post.effect.marg)
## [1] 0.4223907
qe=quantile(post.effect.marg,p=c(0.025,0.975))
qe
## 2.5% 97.5%
## -0.1602609 0.9951733
exp(qe)
## 2.5% 97.5%
## 0.8519215 2.7051930
abline(v=qe,lty=2)
quantile(post.effect.marg,p=c(0.005,0.995))
## 0.5% 99.5%
## -0.3338747 1.1708917
exp(quantile(post.effect.marg,p=c(0.005,0.995)))
## 0.5% 99.5%
## 0.7161435 3.2248668
quantile(post.effect.marg,p=c(0.025,0.975))
## 2.5% 97.5%
## -0.1602609 0.9951733
exp(quantile(post.effect.marg,p=c(0.025,0.975)))
## 2.5% 97.5%
## 0.8519215 2.7051930
quantile(post.effect.marg,p=c(0.05,0.95))
## 5% 95%
## -0.06165581 0.90305738
exp(quantile(post.effect.marg,p=c(0.05,0.95)))
## 5% 95%
## 0.9402064 2.4671346
# Analysis of production value
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
y.p=log(py3/py0)[usable] # Response variale: log-ratio
z.p=(p>0)[usable] # Mandated status for the available sample
p.p=p[usable] # Score for available sample
d.p=d[usable] # Effective status for available sample
x.insample=as.matrix(data.frame(z.p,d.p,p.p))
x.outsample=x.insample
x.outsample[,1]=1-x.outsample[,1] # Changing the Effective Status
y.p=as.double(y.p)
bartFit = bart(x.train=x.insample,y.train=y.p,x.test=x.outsample,ndpost=10000,nskip=1000)
##
##
## Running BART with numeric y
##
## number of trees: 200
## Prior:
## k: 2.000000
## degrees of freedom in sigma prior: 3
## quantile in sigma prior: 0.900000
## power and base for tree prior: 2.000000 0.950000
## use quantiles for rule cut points: 0
## data:
## number of training observations: 93
## number of test observations: 93
## number of explanatory variables: 3
##
##
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100)
##
##
## Running mcmc loop:
## iteration: 100 (of 11000)
## iteration: 200 (of 11000)
## iteration: 300 (of 11000)
## iteration: 400 (of 11000)
## iteration: 500 (of 11000)
## iteration: 600 (of 11000)
## iteration: 700 (of 11000)
## iteration: 800 (of 11000)
## iteration: 900 (of 11000)
## iteration: 1000 (of 11000)
## iteration: 1100 (of 11000)
## iteration: 1200 (of 11000)
## iteration: 1300 (of 11000)
## iteration: 1400 (of 11000)
## iteration: 1500 (of 11000)
## iteration: 1600 (of 11000)
## iteration: 1700 (of 11000)
## iteration: 1800 (of 11000)
## iteration: 1900 (of 11000)
## iteration: 2000 (of 11000)
## iteration: 2100 (of 11000)
## iteration: 2200 (of 11000)
## iteration: 2300 (of 11000)
## iteration: 2400 (of 11000)
## iteration: 2500 (of 11000)
## iteration: 2600 (of 11000)
## iteration: 2700 (of 11000)
## iteration: 2800 (of 11000)
## iteration: 2900 (of 11000)
## iteration: 3000 (of 11000)
## iteration: 3100 (of 11000)
## iteration: 3200 (of 11000)
## iteration: 3300 (of 11000)
## iteration: 3400 (of 11000)
## iteration: 3500 (of 11000)
## iteration: 3600 (of 11000)
## iteration: 3700 (of 11000)
## iteration: 3800 (of 11000)
## iteration: 3900 (of 11000)
## iteration: 4000 (of 11000)
## iteration: 4100 (of 11000)
## iteration: 4200 (of 11000)
## iteration: 4300 (of 11000)
## iteration: 4400 (of 11000)
## iteration: 4500 (of 11000)
## iteration: 4600 (of 11000)
## iteration: 4700 (of 11000)
## iteration: 4800 (of 11000)
## iteration: 4900 (of 11000)
## iteration: 5000 (of 11000)
## iteration: 5100 (of 11000)
## iteration: 5200 (of 11000)
## iteration: 5300 (of 11000)
## iteration: 5400 (of 11000)
## iteration: 5500 (of 11000)
## iteration: 5600 (of 11000)
## iteration: 5700 (of 11000)
## iteration: 5800 (of 11000)
## iteration: 5900 (of 11000)
## iteration: 6000 (of 11000)
## iteration: 6100 (of 11000)
## iteration: 6200 (of 11000)
## iteration: 6300 (of 11000)
## iteration: 6400 (of 11000)
## iteration: 6500 (of 11000)
## iteration: 6600 (of 11000)
## iteration: 6700 (of 11000)
## iteration: 6800 (of 11000)
## iteration: 6900 (of 11000)
## iteration: 7000 (of 11000)
## iteration: 7100 (of 11000)
## iteration: 7200 (of 11000)
## iteration: 7300 (of 11000)
## iteration: 7400 (of 11000)
## iteration: 7500 (of 11000)
## iteration: 7600 (of 11000)
## iteration: 7700 (of 11000)
## iteration: 7800 (of 11000)
## iteration: 7900 (of 11000)
## iteration: 8000 (of 11000)
## iteration: 8100 (of 11000)
## iteration: 8200 (of 11000)
## iteration: 8300 (of 11000)
## iteration: 8400 (of 11000)
## iteration: 8500 (of 11000)
## iteration: 8600 (of 11000)
## iteration: 8700 (of 11000)
## iteration: 8800 (of 11000)
## iteration: 8900 (of 11000)
## iteration: 9000 (of 11000)
## iteration: 9100 (of 11000)
## iteration: 9200 (of 11000)
## iteration: 9300 (of 11000)
## iteration: 9400 (of 11000)
## iteration: 9500 (of 11000)
## iteration: 9600 (of 11000)
## iteration: 9700 (of 11000)
## iteration: 9800 (of 11000)
## iteration: 9900 (of 11000)
## iteration: 10000 (of 11000)
## iteration: 10100 (of 11000)
## iteration: 10200 (of 11000)
## iteration: 10300 (of 11000)
## iteration: 10400 (of 11000)
## iteration: 10500 (of 11000)
## iteration: 10600 (of 11000)
## iteration: 10700 (of 11000)
## iteration: 10800 (of 11000)
## iteration: 10900 (of 11000)
## iteration: 11000 (of 11000)
## time for loop: 19
##
## Tree sizes, last iteration:
## 2 2 2 2 2 2 3 2 2 2 2 2 3 2 3 2 2 1 2 2
## 2 2 2 2 2 2 2 2 1 2 2 3 1 2 2 3 3 3 2 2
## 2 2 2 3 2 1 1 2 3 2 2 2 2 2 2 2 2 1 2 2
## 2 1 2 2 1 3 1 1 2 2 2 2 2 2 3 3 2 2 2 2
## 3 2 2 2 2 2 3 2 3 2 3 1 3 2 3 3 2 2 2 3
## 2 3 2 2 2 3 3 2 2 2 3 2 2 2 2 3 1 2 3 3
## 2 4 2 2 2 2 2 4 2 2 3 2 2 2 2 2 2 3 2 4
## 2 1 2 2 2 3 2 2 3 2 2 2 2 2 2 2 3 3 4 4
## 2 2 2 3 4 3 2 2 1 2 3 2 2 2 2 2 3 2 2 2
## 2 1 2 2 2 2 3 2 2 2 2 2 2 2 2 2 2 3 2 2
## Variable Usage, last iteration (var:count):
## (1: 78) (2: 85) (3: 73)
## DONE BART 11-2-2014
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(d.p==1)
cfactual=93+which(d.p==0)
post.effect.marg=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
hist(post.effect.marg,nclass=50,prob=TRUE,main="(b) Posterior Effect on \n Production Value",xlab="log(y2003/y2000)",ylab="Density")
mean(post.effect.marg>0)
## [1] 0.848
mean(post.effect.marg)
## [1] 0.3524183
qe=quantile(post.effect.marg,p=c(0.025,0.975))
qe
## 2.5% 97.5%
## -0.3151991 1.0502273
abline(v=qe,lty=2)
quantile(post.effect.marg,p=c(0.005,0.995))
## 0.5% 99.5%
## -0.5537272 1.2753641
exp(quantile(post.effect.marg,p=c(0.005,0.995)))
## 0.5% 99.5%
## 0.5748034 3.5800045
quantile(post.effect.marg,p=c(0.025,0.975))
## 2.5% 97.5%
## -0.3151991 1.0502273
exp(quantile(post.effect.marg,p=c(0.025,0.975)))
## 2.5% 97.5%
## 0.7296436 2.8583008
quantile(post.effect.marg,p=c(0.5,0.95))
## 50% 95%
## 0.3486283 0.9402258
exp(quantile(post.effect.marg,p=c(0.05,0.95)))
## 5% 95%
## 0.8115872 2.5605596
#dev.off()
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
Y=log(ey3/ey0)[usable] # Response variale: log-ratio
W=d[usable] # Effective status for available sample
p.e=p[usable] # Score for available sample
X=as.matrix(data.frame(p.e))
crf.e=causal_forest(X=X,Y=Y,W=W,
num.trees = 10000,honesty = TRUE)
ate.e=average_treatment_effect(crf.e, target.sample = "overlap")
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
Y=log(py3/py0)[usable] # Response variale: log-ratio
W=d[usable] # Effective status for available sample
p.p=p[usable] # Score for available sample
X=as.matrix(data.frame(p.p))
crf.p=causal_forest(X=X,Y=Y,W=W,
num.trees = 10000,honesty = TRUE)
ate.p=average_treatment_effect(crf.p, target.sample = "overlap")
inf=round(ate.e[1]+qnorm(c(0.005,0.025,0.05))*ate.e[2],2)
sup=round(ate.e[1]-qnorm(c(0.005,0.025,0.05))*ate.e[2],2)
cbind(inf,sup)
## inf sup
## [1,] -0.28 0.96
## [2,] -0.13 0.81
## [3,] -0.05 0.74
inf=round(ate.p[1]+qnorm(c(0.005,0.025,0.05))*ate.p[2],2)
sup=round(ate.p[1]-qnorm(c(0.005,0.025,0.05))*ate.p[2],2)
cbind(inf,sup)
## inf sup
## [1,] -0.28 0.92
## [2,] -0.14 0.77
## [3,] -0.06 0.70