Enterprises are sorted by score (p variable)
source("read-dat.R")
head(dat)
## i1 i2 i3 i4 p z d ey0 ey3 py0 py3 dy0
## 276/99 0.3000000 0.000000000 1.000000 19 -4.645011 0 0 10 NA 3616 NA NA
## 343/99 0.3000000 0.002730963 1.000000 21 -3.606017 0 0 NA NA NA 315 NA
## 257/99 0.3000000 0.006521739 1.000000 23 -3.293505 0 0 NA NA NA NA NA
## D1500264 0.3000344 0.001377885 1.000000 22 -3.237792 0 1 18 21 7983 9409 NA
## 307/99 0.3214286 0.011127107 1.176471 21 -3.173211 0 0 1 NA 484 NA NA
## D1500228 0.3227699 0.003129890 1.052632 23 -3.114608 0 0 2 NA 269 257 NA
## dpy3
## 276/99 NA
## 343/99 NA
## 257/99 NA
## D1500264 NA
## 307/99 NA
## D1500228 NA
These Functions returns the + (.plus) and - (.minus) in alpha.hat formula
minimi.plus=function(X,S,K,Z){
A=sum(K*Z*X)
B=sum(K*Z)
C=sum(Z*K*S)
D=sum(Z*K*S*X)
E=sum(K*Z*(S^2))
b.plus=(A*C-B*D)/(C^2-B*E)
x.plus=(A-b.plus*C)/B
return(c(x.plus,b.plus))}
minimi.minus=function(X,S,K,Z){
A=sum(K*(1-Z)*X)
B=sum(K*(1-Z))
C=sum((1-Z)*K*S)
D=sum((1-Z)*K*S*X)
E=sum(K*(1-Z)*(S^2))
b.minus=(A*C-B*D)/(C^2-B*E)
x.minus=(A-b.minus*C)/B
return(c(x.minus,b.minus))}
This function returns Calibrated BootStrap CI that is, the nominal coverage correspond to actua coverage
bootci=function(y,p,K,z,d,M=1000,init.seed=NULL){
if(!is.null(init.seed)) set.seed(init.seed)
alpha.hat=function(bdat){
y=bdat$y
p=bdat$p
K=bdat$K
z=bdat$z
d=bdat$d
alpha.hat=(minimi.plus(y,p,K,z)[1]-minimi.minus(y,p,K,z)[1])/
(minimi.plus(d,p,K,z)[1]-minimi.minus(d,p,K,z)[1])
return(alpha.hat)
}
bdat=data.frame(y=y,p=p,K=K,z=z,d=d)
res=suppressWarnings (bcajack(x = bdat, B = M, func = alpha.hat, mr = 40,
alpha = c(0.005,0.025,0.05),verbose = FALSE))
return(c(res$lims[c(1,7),1],res$lims[c(2,6),1],
res$lims[c(3,5),1]))
}
Here we provide the results in the paper.
attach(dat)
p.bar=-0.41067715 # Threshold selection
n=length(p);n # Sample size used to estimate the weithing function
## [1] 215
p=p-p.bar # Centered score
dens=density(p) # kernel density
K=rep(NA,n)
for(i in 1:n) K[i]=mean(c(dens$y[dens$x==min(dens$x[dens$x>=p[i]])],dens$y[dens$x==max(dens$x[dens$x<=p[i]])]))
K=K/sum(K)
# Analysis of production value
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
y.p=log(py3/py0)[usable] # Response variale: log-ratio
z.p=(p>0)[usable] # Mandated status for the available sample
p.p=p[usable] # Score for available sample
d.p=d[usable] # Effective status for available sample
K.p=K[usable] # Weight for available sample
table(z.p,d.p) # Tables 1 and 2
## d.p
## z.p 0 1
## FALSE 26 13
## TRUE 9 45
ypiu=minimi.plus(y.p,p.p,K.p,z.p) # y+ estimate
ymeno=minimi.minus(y.p,p.p,K.p,z.p) # y- estimate
dpiu=minimi.plus(d.p,p.p,K.p,z.p) # d+ estimate
dmeno=minimi.minus(d.p,p.p,K.p,z.p) # d- estimate
cat("Estimated proportion of Compliers: ",dpiu[1]-dmeno[1],"\n")
## Estimated proportion of Compliers: 0.5822672
alpha.hat.p=(ypiu[1]-ymeno[1])/(dpiu[1]-dmeno[1]) # alpha.hat
alpha.hat.p
## [1] 0.4444406
exp(alpha.hat.p)
## [1] 1.559618
# Boot Confidence Interval for effect on Production Value
bootci(y=y.p,p=p.p,K=K.p,z=z.p,d=d.p,M=100000,init.seed=17)
## 0.005 0.995 0.025 0.975 0.05 0.95
## -0.44523722 3.66504896 -0.19506843 1.77800637 -0.08995075 1.40215145
# Analysis of Employement
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
y.e=log(ey3/ey0)[usable] # Response variale: log-ratio
z.e=(p>0)[usable] # Mandated status for the available sample
p.e=p[usable] # Score for available sample
d.e=d[usable] # Effective status for available sample
K.e=K[usable] # Weight for available sample
table(z.e,d.e) # Tables 1 and 2
## d.e
## z.e 0 1
## FALSE 22 13
## TRUE 8 39
ypiu=minimi.plus(y.e,p.e,K.e,z.e) # y+ estimate
ymeno=minimi.minus(y.e,p.e,K.e,z.e) # y- estimate
dpiu=minimi.plus(d.e,p.e,K.e,z.e) # d+ estimate
dmeno=minimi.minus(d.e,p.e,K.e,z.e) # d- estimate
cat("Estimated proportion of Compliers: ",dpiu[1]-dmeno[1],"\n")
## Estimated proportion of Compliers: 0.5589882
alpha.hat.e=(ypiu[1]-ymeno[1])/(dpiu[1]-dmeno[1]) # alpha.hat
alpha.hat.e
## [1] 0.5667232
exp(alpha.hat.e)
## [1] 1.762482
# Boot Confidence Interval for effect on Employement
bootci(y=y.e,p=p.e,K=K.e,z=z.e,d=d.e,M=100000,init.seed=17)
## 0.005 0.995 0.025 0.975 0.05 0.95
## -6.51126505 4.41135983 -0.18208744 2.23276228 -0.05752017 1.72723005
# Figure 1
#pdf(file="results.pdf")
hl=-4
par(mfrow=c(2,1),mar=c(5, 5, 4, 2)-1)
# Figure Results for Employement
plot(p.e+p.bar,y.e,pch=21-d.e*2,xlab="p",ylab="y",main="(a) Effect on Occupation",ylim=c(hl,5),xlim=c(-3,4))
abline(v=p.bar,lty=2)
abline(h=0)
legend(2,5,c("Treated","Non-treated"),pch=c(19,21))
lines(p.e[p.e>0]+p.bar,ypiu[1]+ypiu[2]*( p.e[p.e>0]+p.bar),lwd=2)
lines(p.e[p.e<0]+p.bar,ymeno[1]+ymeno[2]*( p.e[p.e<0]+p.bar),lwd=2)
tonalita.grigio= grey(1-K.e/max(K.e))
for(i in 1:(length(p.e)-1)) lines(c(p.e[i]+p.bar, p.e[i+1]+p.bar),c(hl,hl),col= tonalita.grigio[i],lwd=12)
tonalita.grigio= grey(K/max(K))
for(i in c(seq(1,length(p.e),by=8), length(p.e))) text(p.e[i]+p.bar,hl,round(K[i],2),col=tonalita.grigio[i],cex=0.6)
# Figure Results for Production Value
plot(p.p+p.bar,y.p,pch=21-d.p*2,xlab="p",ylab="y",main="(b) Effect on Production Value",ylim=c(hl,5),xlim=c(-3,4))
abline(v=p.bar,lty=2)
abline(h=0)
legend(2,5,c("Treated","Non-reated"),pch=c(19,21))
lines(p.p[p.p>0]+p.bar,ypiu[1]+ypiu[2]*( p.p[p.p>0]+p.bar),lwd=2)
lines(p.p[p.p<0]+p.bar,ymeno[1]+ymeno[2]*( p.p[p.p<0]+p.bar),lwd=2)
tonalita.grigio= grey(1-K.p/max(K.p))
for(i in 1:(length(p.p)-1)) lines(c(p.p[i]+p.bar, p.p[i+1]+p.bar),c(hl,hl),col= tonalita.grigio[i],lwd=12)
tonalita.grigio= grey(K/max(K))
for(i in c(seq(1,length(p.p),by=8), length(p.p))) text(p.p[i]+p.bar,hl,round(K[i],2),col=tonalita.grigio[i],cex=0.6)
#dev.off()
nrep=100
sim.res=data.frame(inf=1,sup=1,lev=0.9,n=20,gamma.shape=1,prop.compliers=0.2,nrep=nrep)
for(nsim in c(20,50,100,200,300)){
for(gamma.shape in c(0.5,1,2,5,10)){
for(prop.compliers in c(0.2,0.3,0.5,0.6,0.7)){
for(i in 1:nrep){
psim=rnorm(nsim)
zsim=psim>0
denssim=density(psim) # kernel density
Ksim=rep(NA,nsim)
for(i in 1:nsim) Ksim[i]=mean(
c(denssim$y[denssim$x==min(denssim$x[denssim$x>=psim[i]])],
denssim$y[denssim$x==max(denssim$x[denssim$x<=psim[i]])]))
Ksim=Ksim/sum(Ksim)
ysim=rgamma(nsim,shape=gamma.shape,rate=1)
dsim=rep(0,nsim)
while(sum(dsim)<=1){dsim[zsim]=rbinom(sum(zsim),1,p=prop.compliers)}
rr=bootci(y=ysim,p=psim,K=Ksim,z=zsim,d=dsim,M=1000)
intres=rbind(
data.frame(inf=rr[1],sup=rr[2],lev=0.99),
data.frame(inf=rr[3],sup=rr[4],lev=0.95),
data.frame(inf=rr[5],sup=rr[6],lev=0.90))
intres=cbind(intres,data.frame(n=nsim,
gamma.shape=gamma.shape,
prop.compliers=prop.compliers,nrep=nrep))
sim.res=rbind(sim.res,intres)
}
cat(prop.compliers,gamma.shape,nsim,"\n")
}
}
}
sim.res=sim.res[-1,]
save(sim.res,file="simulres4.RData")
Analysis of simulations
load(file="simulres4.RData")
sim.res = sim.res %>% mutate(inside=(inf<0)&(sup>0),int.wide=sup-inf) %>% na.omit()
res=sim.res %>% group_by(lev,n,gamma.shape,prop.compliers) %>% summarise(cov=mean(inside),wide=mean(int.wide)) %>% ungroup()
## `summarise()` has grouped output by 'lev', 'n', 'gamma.shape'. You can override
## using the `.groups` argument.
res1= sim.res %>%
group_by(lev,n) %>%
summarise(cov=mean(inside),
sdcov=sqrt(mean(inside)*(1-mean(inside))/length(inside)),
wide.mean=mean(int.wide),wide.sd=sd(int.wide)/sqrt(length(inside))) %>%
mutate(min.cov=cov+qnorm(0.025)*sdcov,max.cov=cov-qnorm(0.025)*sdcov,
min.wide=wide.mean+qnorm(0.025)*wide.sd,max.wide=wide.mean-qnorm(0.025)*wide.sd) %>%
ungroup()
## `summarise()` has grouped output by 'lev'. You can override using the `.groups`
## argument.
res2= sim.res %>%
group_by(gamma.shape,prop.compliers) %>%
summarise(cov=mean(inside),
sdcov=sqrt(mean(inside)*(1-mean(inside))/length(inside)),
wide.mean=mean(int.wide),wide.sd=sd(int.wide)/sqrt(length(inside))) %>%
mutate(min.cov=cov+qnorm(0.025)*sdcov,max.cov=cov-qnorm(0.025)*sdcov,
min.wide=wide.mean+qnorm(0.025)*wide.sd,max.wide=wide.mean-qnorm(0.025)*wide.sd) %>%
ungroup()
## `summarise()` has grouped output by 'gamma.shape'. You can override using the
## `.groups` argument.
totab1=round(res1 %>% select(lev,n,cov) %>% reshape2::dcast(data=.,lev~n),2)
## Using cov as value column: use value.var to override.
rownames(totab1)=NULL
totab2=round(res2 %>% select(gamma.shape,prop.compliers,cov) %>% mutate(gamma.shape=2/sqrt(gamma.shape)) %>% reshape2::dcast(data=.,gamma.shape~prop.compliers),2)
## Using cov as value column: use value.var to override.
rownames(totab2)=NULL
print(xtable::xtable(totab1,caption="Coverage of Bootstrap C.I. by sample size $n$ (columns) and nominal levels (rows)."),include.rownames=FALSE)
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Wed Mar 20 14:19:14 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrr}
## \hline
## lev & 20 & 50 & 100 & 200 & 300 \\
## \hline
## 0.90 & 0.82 & 0.86 & 0.88 & 0.88 & 0.88 \\
## 0.95 & 0.91 & 0.92 & 0.93 & 0.94 & 0.93 \\
## 0.99 & 0.97 & 0.98 & 0.98 & 0.98 & 0.98 \\
## \hline
## \end{tabular}
## \caption{Coverage of Bootstrap C.I. by sample size $n$ (columns) and nominal levels (rows).}
## \end{table}
print(xtable::xtable(totab2,caption="Coverage of Bootstrap C.I. by proportion of compliers (columns) and asimmetry in $Y$ (rows)."),include.rownames=FALSE)
## % latex table generated in R 4.3.3 by xtable 1.8-4 package
## % Wed Mar 20 14:19:14 2024
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrr}
## \hline
## gamma.shape & 0.2 & 0.3 & 0.5 & 0.6 & 0.7 \\
## \hline
## 0.63 & 0.94 & 0.94 & 0.94 & 0.94 & 0.95 \\
## 0.89 & 0.91 & 0.93 & 0.95 & 0.93 & 0.94 \\
## 1.41 & 0.93 & 0.92 & 0.93 & 0.92 & 0.92 \\
## 2.00 & 0.94 & 0.93 & 0.92 & 0.92 & 0.92 \\
## 2.83 & 0.92 & 0.91 & 0.90 & 0.92 & 0.91 \\
## \hline
## \end{tabular}
## \caption{Coverage of Bootstrap C.I. by proportion of compliers (columns) and asimmetry in $Y$ (rows).}
## \end{table}
library(rpart)
library(rpart.plot)
library(rattle)
## Caricamento del pacchetto richiesto: tibble
## Caricamento del pacchetto richiesto: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
regtree <- rpart(z ~ i1+i2+i3+i4)
regtree
## n= 215
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 215 53.7395300 0.50697670
## 2) i1< 0.3978639 141 30.2695000 0.31205670
## 4) i4< 27.5 123 22.1626000 0.23577240
## 8) i3< 1.212522 76 6.3552630 0.09210526
## 16) i1< 0.3241837 47 0.9787234 0.02127660 *
## 17) i1>=0.3241837 29 4.7586210 0.20689660
## 34) i3>=1.081113 18 0.9444444 0.05555556 *
## 35) i3< 1.081113 11 2.7272730 0.45454550 *
## 9) i3>=1.212522 47 11.7021300 0.46808510
## 18) i4< 24.5 28 4.1071430 0.17857140
## 36) i2< 0.005782026 18 0.9444444 0.05555556 *
## 37) i2>=0.005782026 10 2.4000000 0.40000000 *
## 19) i4>=24.5 19 1.7894740 0.89473680 *
## 5) i4>=27.5 18 2.5000000 0.83333330 *
## 3) i1>=0.3978639 74 7.9054050 0.87837840
## 6) i4< 25.5 32 6.4687500 0.71875000
## 12) i1< 0.4254474 10 2.1000000 0.30000000 *
## 13) i1>=0.4254474 22 1.8181820 0.90909090 *
## 7) i4>=25.5 42 0.0000000 1.00000000 *
summary(regtree)
## Call:
## rpart(formula = z ~ i1 + i2 + i3 + i4)
## n= 215
##
## CP nsplit rel error xerror xstd
## 1 0.28963083 0 1.0000000 1.0117439 0.003804685
## 2 0.10433477 1 0.7103692 0.7900188 0.061002975
## 3 0.09221072 2 0.6060344 0.8192376 0.072555246
## 4 0.03709767 4 0.4216130 0.7324996 0.078362684
## 5 0.01586190 6 0.3474176 0.6302829 0.077891653
## 6 0.01419250 8 0.3156938 0.6312773 0.076160895
## 7 0.01000000 9 0.3015013 0.6348525 0.077110544
##
## Variable importance
## i1 i4 i3 i2
## 49 29 14 7
##
## Node number 1: 215 observations, complexity param=0.2896308
## mean=0.5069767, MSE=0.2499513
## left son=2 (141 obs) right son=3 (74 obs)
## Primary splits:
## i1 < 0.3978639 to the left, improve=0.28963080, (0 missing)
## i3 < 1.162948 to the left, improve=0.12175900, (0 missing)
## i4 < 24.5 to the left, improve=0.11420310, (0 missing)
## i2 < 0.01909164 to the left, improve=0.03893459, (0 missing)
## Surrogate splits:
## i3 < 1.40873 to the left, agree=0.665, adj=0.027, (0 split)
## i2 < 0.02380738 to the left, agree=0.660, adj=0.014, (0 split)
##
## Node number 2: 141 observations, complexity param=0.1043348
## mean=0.3120567, MSE=0.2146773
## left son=4 (123 obs) right son=5 (18 obs)
## Primary splits:
## i4 < 27.5 to the left, improve=0.1852327, (0 missing)
## i3 < 1.162948 to the left, improve=0.1219094, (0 missing)
## i2 < 0.01982193 to the left, improve=0.0399378, (0 missing)
## i1 < 0.3007355 to the left, improve=0.0366842, (0 missing)
##
## Node number 3: 74 observations, complexity param=0.03709767
## mean=0.8783784, MSE=0.1068298
## left son=6 (32 obs) right son=7 (42 obs)
## Primary splits:
## i4 < 25.5 to the left, improve=0.18173080, (0 missing)
## i1 < 0.4186274 to the left, improve=0.16624200, (0 missing)
## i3 < 1.087085 to the left, improve=0.09042496, (0 missing)
## i2 < 0.004128366 to the left, improve=0.05160640, (0 missing)
## Surrogate splits:
## i2 < 0.005751243 to the right, agree=0.635, adj=0.156, (0 split)
## i3 < 1.03652 to the left, agree=0.608, adj=0.094, (0 split)
## i1 < 0.712 to the right, agree=0.595, adj=0.063, (0 split)
##
## Node number 4: 123 observations, complexity param=0.09221072
## mean=0.2357724, MSE=0.1801838
## left son=8 (76 obs) right son=9 (47 obs)
## Primary splits:
## i3 < 1.212522 to the left, improve=0.18523140, (0 missing)
## i4 < 24.5 to the left, improve=0.09953909, (0 missing)
## i2 < 0.0004058315 to the left, improve=0.03962522, (0 missing)
## i1 < 0.3229234 to the left, improve=0.03217129, (0 missing)
## Surrogate splits:
## i1 < 0.3571322 to the left, agree=0.732, adj=0.298, (0 split)
## i4 < 23.5 to the right, agree=0.642, adj=0.064, (0 split)
## i2 < 0.0294965 to the left, agree=0.626, adj=0.021, (0 split)
##
## Node number 5: 18 observations
## mean=0.8333333, MSE=0.1388889
##
## Node number 6: 32 observations, complexity param=0.03709767
## mean=0.71875, MSE=0.2021484
## left son=12 (10 obs) right son=13 (22 obs)
## Primary splits:
## i1 < 0.4254474 to the left, improve=0.39429070, (0 missing)
## i2 < 0.004236634 to the left, improve=0.27085350, (0 missing)
## i3 < 1.162948 to the left, improve=0.18411160, (0 missing)
## i4 < 24.5 to the right, improve=0.01449275, (0 missing)
##
## Node number 7: 42 observations
## mean=1, MSE=0
##
## Node number 8: 76 observations, complexity param=0.0158619
## mean=0.09210526, MSE=0.08362188
## left son=16 (47 obs) right son=17 (29 obs)
## Primary splits:
## i1 < 0.3241837 to the left, improve=0.09722950, (0 missing)
## i2 < 0.002904424 to the left, improve=0.06616257, (0 missing)
## i4 < 26.5 to the left, improve=0.06283366, (0 missing)
## i3 < 1.081113 to the right, improve=0.01247863, (0 missing)
## Surrogate splits:
## i3 < 1.058231 to the left, agree=0.645, adj=0.069, (0 split)
##
## Node number 9: 47 observations, complexity param=0.09221072
## mean=0.4680851, MSE=0.2489814
## left son=18 (28 obs) right son=19 (19 obs)
## Primary splits:
## i4 < 24.5 to the left, improve=0.49610730, (0 missing)
## i1 < 0.3037437 to the right, improve=0.09123339, (0 missing)
## i2 < 0.004423548 to the right, improve=0.06321258, (0 missing)
## i3 < 1.360607 to the left, improve=0.05204644, (0 missing)
## Surrogate splits:
## i2 < 0.004423548 to the right, agree=0.723, adj=0.316, (0 split)
## i1 < 0.3430486 to the right, agree=0.702, adj=0.263, (0 split)
## i3 < 1.547619 to the left, agree=0.638, adj=0.105, (0 split)
##
## Node number 12: 10 observations
## mean=0.3, MSE=0.21
##
## Node number 13: 22 observations
## mean=0.9090909, MSE=0.08264463
##
## Node number 16: 47 observations
## mean=0.0212766, MSE=0.0208239
##
## Node number 17: 29 observations, complexity param=0.0158619
## mean=0.2068966, MSE=0.1640904
## left son=34 (18 obs) right son=35 (11 obs)
## Primary splits:
## i3 < 1.081113 to the right, improve=0.22840730, (0 missing)
## i2 < 0.002785339 to the left, improve=0.13729980, (0 missing)
## i4 < 24.5 to the left, improve=0.13729980, (0 missing)
## i1 < 0.3412005 to the right, improve=0.09528515, (0 missing)
## Surrogate splits:
## i1 < 0.3556962 to the left, agree=0.793, adj=0.455, (0 split)
## i2 < 0.01772197 to the left, agree=0.690, adj=0.182, (0 split)
## i4 < 23.5 to the right, agree=0.655, adj=0.091, (0 split)
##
## Node number 18: 28 observations, complexity param=0.0141925
## mean=0.1785714, MSE=0.1466837
## left son=36 (18 obs) right son=37 (10 obs)
## Primary splits:
## i2 < 0.005782026 to the left, improve=0.18570050, (0 missing)
## i4 < 23.5 to the left, improve=0.15108110, (0 missing)
## i3 < 1.37012 to the left, improve=0.14202900, (0 missing)
## i1 < 0.3701228 to the left, improve=0.02608696, (0 missing)
## Surrogate splits:
## i1 < 0.3222502 to the right, agree=0.75, adj=0.3, (0 split)
##
## Node number 19: 19 observations
## mean=0.8947368, MSE=0.09418283
##
## Node number 34: 18 observations
## mean=0.05555556, MSE=0.05246914
##
## Node number 35: 11 observations
## mean=0.4545455, MSE=0.2479339
##
## Node number 36: 18 observations
## mean=0.05555556, MSE=0.05246914
##
## Node number 37: 10 observations
## mean=0.4, MSE=0.24
pdf(file="results-rt.pdf")
fancyRpartPlot(regtree,main="Regression Tree: z ~ I1+I2+I3+I4",sub = "")
dev.off()
## png
## 2
#pdf(file="results-bart.pdf")
par(mfrow=c(2,1))
# Analysis of Employement
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
y.e=log(ey3/ey0)[usable] # Response variale: log-ratio
z.e=(p>0)[usable] # Mandated status for the available sample
p.e=p[usable] # Score for available sample
d.e=d[usable] # Effective status for available sample
x.insample=as.matrix(data.frame(z.e,d.e,p.e))
x.outsample=x.insample
x.outsample[,1]=1-x.outsample[,1] # Changing the Effective Status
y.e=as.double(y.e)
bartFit = bart(x.train=x.insample,y.train=y.e,x.test=x.outsample,ndpost=10000,nskip=1000)
##
##
## Running BART with numeric y
##
## number of trees: 200
## Prior:
## k: 2.000000
## degrees of freedom in sigma prior: 3
## quantile in sigma prior: 0.900000
## power and base for tree prior: 2.000000 0.950000
## use quantiles for rule cut points: 0
## data:
## number of training observations: 82
## number of test observations: 82
## number of explanatory variables: 3
##
##
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100)
##
##
## Running mcmc loop:
## iteration: 100 (of 11000)
## iteration: 200 (of 11000)
## iteration: 300 (of 11000)
## iteration: 400 (of 11000)
## iteration: 500 (of 11000)
## iteration: 600 (of 11000)
## iteration: 700 (of 11000)
## iteration: 800 (of 11000)
## iteration: 900 (of 11000)
## iteration: 1000 (of 11000)
## iteration: 1100 (of 11000)
## iteration: 1200 (of 11000)
## iteration: 1300 (of 11000)
## iteration: 1400 (of 11000)
## iteration: 1500 (of 11000)
## iteration: 1600 (of 11000)
## iteration: 1700 (of 11000)
## iteration: 1800 (of 11000)
## iteration: 1900 (of 11000)
## iteration: 2000 (of 11000)
## iteration: 2100 (of 11000)
## iteration: 2200 (of 11000)
## iteration: 2300 (of 11000)
## iteration: 2400 (of 11000)
## iteration: 2500 (of 11000)
## iteration: 2600 (of 11000)
## iteration: 2700 (of 11000)
## iteration: 2800 (of 11000)
## iteration: 2900 (of 11000)
## iteration: 3000 (of 11000)
## iteration: 3100 (of 11000)
## iteration: 3200 (of 11000)
## iteration: 3300 (of 11000)
## iteration: 3400 (of 11000)
## iteration: 3500 (of 11000)
## iteration: 3600 (of 11000)
## iteration: 3700 (of 11000)
## iteration: 3800 (of 11000)
## iteration: 3900 (of 11000)
## iteration: 4000 (of 11000)
## iteration: 4100 (of 11000)
## iteration: 4200 (of 11000)
## iteration: 4300 (of 11000)
## iteration: 4400 (of 11000)
## iteration: 4500 (of 11000)
## iteration: 4600 (of 11000)
## iteration: 4700 (of 11000)
## iteration: 4800 (of 11000)
## iteration: 4900 (of 11000)
## iteration: 5000 (of 11000)
## iteration: 5100 (of 11000)
## iteration: 5200 (of 11000)
## iteration: 5300 (of 11000)
## iteration: 5400 (of 11000)
## iteration: 5500 (of 11000)
## iteration: 5600 (of 11000)
## iteration: 5700 (of 11000)
## iteration: 5800 (of 11000)
## iteration: 5900 (of 11000)
## iteration: 6000 (of 11000)
## iteration: 6100 (of 11000)
## iteration: 6200 (of 11000)
## iteration: 6300 (of 11000)
## iteration: 6400 (of 11000)
## iteration: 6500 (of 11000)
## iteration: 6600 (of 11000)
## iteration: 6700 (of 11000)
## iteration: 6800 (of 11000)
## iteration: 6900 (of 11000)
## iteration: 7000 (of 11000)
## iteration: 7100 (of 11000)
## iteration: 7200 (of 11000)
## iteration: 7300 (of 11000)
## iteration: 7400 (of 11000)
## iteration: 7500 (of 11000)
## iteration: 7600 (of 11000)
## iteration: 7700 (of 11000)
## iteration: 7800 (of 11000)
## iteration: 7900 (of 11000)
## iteration: 8000 (of 11000)
## iteration: 8100 (of 11000)
## iteration: 8200 (of 11000)
## iteration: 8300 (of 11000)
## iteration: 8400 (of 11000)
## iteration: 8500 (of 11000)
## iteration: 8600 (of 11000)
## iteration: 8700 (of 11000)
## iteration: 8800 (of 11000)
## iteration: 8900 (of 11000)
## iteration: 9000 (of 11000)
## iteration: 9100 (of 11000)
## iteration: 9200 (of 11000)
## iteration: 9300 (of 11000)
## iteration: 9400 (of 11000)
## iteration: 9500 (of 11000)
## iteration: 9600 (of 11000)
## iteration: 9700 (of 11000)
## iteration: 9800 (of 11000)
## iteration: 9900 (of 11000)
## iteration: 10000 (of 11000)
## iteration: 10100 (of 11000)
## iteration: 10200 (of 11000)
## iteration: 10300 (of 11000)
## iteration: 10400 (of 11000)
## iteration: 10500 (of 11000)
## iteration: 10600 (of 11000)
## iteration: 10700 (of 11000)
## iteration: 10800 (of 11000)
## iteration: 10900 (of 11000)
## iteration: 11000 (of 11000)
## time for loop: 20
##
## Tree sizes, last iteration:
## 2 2 2 3 2 2 3 2 2 2 2 2 1 2 2 2 2 2 4 3
## 2 2 2 3 2 2 3 2 1 2 2 3 2 2 2 2 2 2 2 4
## 1 2 2 1 2 2 2 3 2 2 2 1 2 2 2 2 2 2 2 3
## 3 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 3 2 1
## 2 2 2 4 3 2 3 2 3 2 2 2 3 2 2 2 2 2 2 2
## 2 4 2 3 2 2 2 2 2 2 2 2 2 3 1 2 3 2 2 2
## 2 2 3 2 5 2 2 2 2 2 3 2 2 2 2 3 2 2 2 2
## 2 2 2 2 3 2 1 4 2 3 4 2 2 2 2 2 2 2 2 2
## 2 2 2 2 2 2 2 3 2 1 2 3 3 3 3 2 2 3 1 2
## 2 2 2 1 2 2 2 2 3 3 2 2 2 2 3 2 3 4 2 2
## Variable Usage, last iteration (var:count):
## (1: 61) (2: 76) (3: 102)
## DONE BART 11-2-2014
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(d.e==1)
cfactual=length(y.e)+which(d.e==0)
post.effect.marg=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
hist(post.effect.marg,nclass=50,prob=TRUE,main="(a) Posterior Effect on \n Occupation",xlab="log(y2003/y2000)",ylab="Density")
mean(post.effect.marg>0)
## [1] 0.9292
mean(post.effect.marg)
## [1] 0.4207876
qe=quantile(post.effect.marg,p=c(0.025,0.975))
qe
## 2.5% 97.5%
## -0.1428711 0.9954635
exp(qe)
## 2.5% 97.5%
## 0.8668658 2.7059782
abline(v=qe,lty=2)
quantile(post.effect.marg,p=c(0.005,0.995))
## 0.5% 99.5%
## -0.3218183 1.1744883
exp(quantile(post.effect.marg,p=c(0.005,0.995)))
## 0.5% 99.5%
## 0.7248299 3.2364863
quantile(post.effect.marg,p=c(0.025,0.975))
## 2.5% 97.5%
## -0.1428711 0.9954635
exp(quantile(post.effect.marg,p=c(0.025,0.975)))
## 2.5% 97.5%
## 0.8668658 2.7059782
quantile(post.effect.marg,p=c(0.05,0.95))
## 5% 95%
## -0.0519006 0.9042703
exp(quantile(post.effect.marg,p=c(0.05,0.95)))
## 5% 95%
## 0.9494232 2.4701288
# Analysis of production value
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
y.p=log(py3/py0)[usable] # Response variale: log-ratio
z.p=(p>0)[usable] # Mandated status for the available sample
p.p=p[usable] # Score for available sample
d.p=d[usable] # Effective status for available sample
x.insample=as.matrix(data.frame(z.p,d.p,p.p))
x.outsample=x.insample
x.outsample[,1]=1-x.outsample[,1] # Changing the Effective Status
y.p=as.double(y.p)
bartFit = bart(x.train=x.insample,y.train=y.p,x.test=x.outsample,ndpost=10000,nskip=1000)
##
##
## Running BART with numeric y
##
## number of trees: 200
## Prior:
## k: 2.000000
## degrees of freedom in sigma prior: 3
## quantile in sigma prior: 0.900000
## power and base for tree prior: 2.000000 0.950000
## use quantiles for rule cut points: 0
## data:
## number of training observations: 93
## number of test observations: 93
## number of explanatory variables: 3
##
##
## Cutoff rules c in x<=c vs x>c
## Number of cutoffs: (var: number of possible c):
## (1: 100) (2: 100) (3: 100)
##
##
## Running mcmc loop:
## iteration: 100 (of 11000)
## iteration: 200 (of 11000)
## iteration: 300 (of 11000)
## iteration: 400 (of 11000)
## iteration: 500 (of 11000)
## iteration: 600 (of 11000)
## iteration: 700 (of 11000)
## iteration: 800 (of 11000)
## iteration: 900 (of 11000)
## iteration: 1000 (of 11000)
## iteration: 1100 (of 11000)
## iteration: 1200 (of 11000)
## iteration: 1300 (of 11000)
## iteration: 1400 (of 11000)
## iteration: 1500 (of 11000)
## iteration: 1600 (of 11000)
## iteration: 1700 (of 11000)
## iteration: 1800 (of 11000)
## iteration: 1900 (of 11000)
## iteration: 2000 (of 11000)
## iteration: 2100 (of 11000)
## iteration: 2200 (of 11000)
## iteration: 2300 (of 11000)
## iteration: 2400 (of 11000)
## iteration: 2500 (of 11000)
## iteration: 2600 (of 11000)
## iteration: 2700 (of 11000)
## iteration: 2800 (of 11000)
## iteration: 2900 (of 11000)
## iteration: 3000 (of 11000)
## iteration: 3100 (of 11000)
## iteration: 3200 (of 11000)
## iteration: 3300 (of 11000)
## iteration: 3400 (of 11000)
## iteration: 3500 (of 11000)
## iteration: 3600 (of 11000)
## iteration: 3700 (of 11000)
## iteration: 3800 (of 11000)
## iteration: 3900 (of 11000)
## iteration: 4000 (of 11000)
## iteration: 4100 (of 11000)
## iteration: 4200 (of 11000)
## iteration: 4300 (of 11000)
## iteration: 4400 (of 11000)
## iteration: 4500 (of 11000)
## iteration: 4600 (of 11000)
## iteration: 4700 (of 11000)
## iteration: 4800 (of 11000)
## iteration: 4900 (of 11000)
## iteration: 5000 (of 11000)
## iteration: 5100 (of 11000)
## iteration: 5200 (of 11000)
## iteration: 5300 (of 11000)
## iteration: 5400 (of 11000)
## iteration: 5500 (of 11000)
## iteration: 5600 (of 11000)
## iteration: 5700 (of 11000)
## iteration: 5800 (of 11000)
## iteration: 5900 (of 11000)
## iteration: 6000 (of 11000)
## iteration: 6100 (of 11000)
## iteration: 6200 (of 11000)
## iteration: 6300 (of 11000)
## iteration: 6400 (of 11000)
## iteration: 6500 (of 11000)
## iteration: 6600 (of 11000)
## iteration: 6700 (of 11000)
## iteration: 6800 (of 11000)
## iteration: 6900 (of 11000)
## iteration: 7000 (of 11000)
## iteration: 7100 (of 11000)
## iteration: 7200 (of 11000)
## iteration: 7300 (of 11000)
## iteration: 7400 (of 11000)
## iteration: 7500 (of 11000)
## iteration: 7600 (of 11000)
## iteration: 7700 (of 11000)
## iteration: 7800 (of 11000)
## iteration: 7900 (of 11000)
## iteration: 8000 (of 11000)
## iteration: 8100 (of 11000)
## iteration: 8200 (of 11000)
## iteration: 8300 (of 11000)
## iteration: 8400 (of 11000)
## iteration: 8500 (of 11000)
## iteration: 8600 (of 11000)
## iteration: 8700 (of 11000)
## iteration: 8800 (of 11000)
## iteration: 8900 (of 11000)
## iteration: 9000 (of 11000)
## iteration: 9100 (of 11000)
## iteration: 9200 (of 11000)
## iteration: 9300 (of 11000)
## iteration: 9400 (of 11000)
## iteration: 9500 (of 11000)
## iteration: 9600 (of 11000)
## iteration: 9700 (of 11000)
## iteration: 9800 (of 11000)
## iteration: 9900 (of 11000)
## iteration: 10000 (of 11000)
## iteration: 10100 (of 11000)
## iteration: 10200 (of 11000)
## iteration: 10300 (of 11000)
## iteration: 10400 (of 11000)
## iteration: 10500 (of 11000)
## iteration: 10600 (of 11000)
## iteration: 10700 (of 11000)
## iteration: 10800 (of 11000)
## iteration: 10900 (of 11000)
## iteration: 11000 (of 11000)
## time for loop: 23
##
## Tree sizes, last iteration:
## 2 2 2 2 2 2 3 3 4 2 4 3 2 2 3 3 1 2 3 3
## 2 1 2 2 2 1 1 2 2 2 2 2 3 2 2 3 2 2 2 2
## 2 2 2 3 2 2 2 2 2 2 3 1 3 2 2 3 2 3 3 2
## 1 3 3 4 2 2 2 2 2 2 3 3 2 2 2 3 2 2 3 1
## 2 3 2 2 3 2 2 2 2 2 2 2 2 2 2 3 2 2 2 3
## 2 2 2 2 2 3 4 1 3 2 3 2 2 3 2 2 2 2 3 2
## 3 1 3 2 2 2 3 3 4 2 1 2 2 1 4 2 3 2 2 4
## 2 2 2 2 3 2 2 2 4 2 2 2 2 2 2 2 1 2 3 2
## 2 2 1 2 3 1 2 2 2 2 2 2 2 2 2 3 2 2 3 2
## 3 2 2 1 2 2 3 2 2 2 2 2 4 2 2 2 2 2 2 2
## Variable Usage, last iteration (var:count):
## (1: 75) (2: 77) (3: 93)
## DONE BART 11-2-2014
yest=cbind(bartFit$yhat.train,bartFit$yhat.test)
factual=which(d.p==1)
cfactual=93+which(d.p==0)
post.effect.marg=apply(yest,1,function(x) mean(x[factual])-mean(x[cfactual]))
hist(post.effect.marg,nclass=50,prob=TRUE,main="(b) Posterior Effect on \n Production Value",xlab="log(y2003/y2000)",ylab="Density")
mean(post.effect.marg>0)
## [1] 0.8402
mean(post.effect.marg)
## [1] 0.3466506
qe=quantile(post.effect.marg,p=c(0.025,0.975))
qe
## 2.5% 97.5%
## -0.3371866 1.0448165
abline(v=qe,lty=2)
quantile(post.effect.marg,p=c(0.005,0.995))
## 0.5% 99.5%
## -0.5636381 1.2728543
exp(quantile(post.effect.marg,p=c(0.005,0.995)))
## 0.5% 99.5%
## 0.5691347 3.5710307
quantile(post.effect.marg,p=c(0.025,0.975))
## 2.5% 97.5%
## -0.3371866 1.0448165
exp(quantile(post.effect.marg,p=c(0.025,0.975)))
## 2.5% 97.5%
## 0.7137756 2.8428769
quantile(post.effect.marg,p=c(0.5,0.95))
## 50% 95%
## 0.3480397 0.9306671
exp(quantile(post.effect.marg,p=c(0.05,0.95)))
## 5% 95%
## 0.787031 2.536200
#dev.off()
usable=!apply(cbind(ey3,ey0,z,d),1,function(x) any(is.na(x))) # usable data
Y=log(ey3/ey0)[usable] # Response variale: log-ratio
W=d[usable] # Effective status for available sample
p.e=p[usable] # Score for available sample
X=as.matrix(data.frame(p.e))
crf.e=causal_forest(X=X,Y=Y,W=W,
num.trees = 10000,honesty = TRUE)
ate.e=average_treatment_effect(crf.e, target.sample = "overlap")
usable=!apply(cbind(py3,py0,z,d),1,function(x) any(is.na(x))) # usable data
Y=log(py3/py0)[usable] # Response variale: log-ratio
W=d[usable] # Effective status for available sample
p.p=p[usable] # Score for available sample
X=as.matrix(data.frame(p.p))
crf.p=causal_forest(X=X,Y=Y,W=W,
num.trees = 10000,honesty = TRUE)
ate.p=average_treatment_effect(crf.p, target.sample = "overlap")
inf=round(ate.e[1]+qnorm(c(0.005,0.025,0.05))*ate.e[2],2)
sup=round(ate.e[1]-qnorm(c(0.005,0.025,0.05))*ate.e[2],2)
cbind(inf,sup)
## inf sup
## [1,] -0.28 0.97
## [2,] -0.13 0.82
## [3,] -0.05 0.74
inf=round(ate.p[1]+qnorm(c(0.005,0.025,0.05))*ate.p[2],2)
sup=round(ate.p[1]-qnorm(c(0.005,0.025,0.05))*ate.p[2],2)
cbind(inf,sup)
## inf sup
## [1,] -0.28 0.91
## [2,] -0.13 0.76
## [3,] -0.06 0.69