bayesian_iv.R

shige — Dec 28, 2013, 2:58 PM

# Use the "bayesm" package for a simple Bayesian IV model

library(bayesm)

set.seed(66)

simIV = function(delta,beta,Sigma,n,z,w,gamma) {
  eps = matrix(rnorm(2*n),ncol=2) %*% chol(Sigma)
  x = z %*% delta + eps[,1]; y = beta*x +  eps[,2] + w%*%gamma
  list(x=as.vector(x),y=as.vector(y)) }
n = 200 ; p=1 # number of instruments
z = cbind(rep(1,n),matrix(runif(n*p),ncol=p))
w = matrix(1,n,1)
rho=.8
Sigma = matrix(c(1,rho,rho,1),ncol=2)
delta = c(1,4); beta = .5; gamma = c(1)
simiv = simIV(delta,beta,Sigma,n,z,w,gamma)

R <- 2000

Mcmc1=list();  Data1 = list()
Data1$z = z; Data1$w=w; Data1$x=simiv$x; Data1$y=simiv$y
Mcmc1$R = R
Mcmc1$keep=1
out=rivGibbs(Data=Data1,Mcmc=Mcmc1)

Starting Gibbs Sampler for Linear IV Model

 nobs=  200 ;  2  instruments;  1  included exog vars
     Note: the numbers above include intercepts if in z or w

Prior Parms: 
mean of delta 
[1] 0 0
Adelta
     [,1] [,2]
[1,] 0.01 0.00
[2,] 0.00 0.01
mean of beta/gamma
[1] 0 0
Abeta/gamma
     [,1] [,2]
[1,] 0.01 0.00
[2,] 0.00 0.01
Sigma Prior Parms
nu=  3  V=
     [,1] [,2]
[1,]    1    0
[2,]    0    1

MCMC parms: R=  2000  keep=  1

MCMC Iteration (est time to end -min) 
  100  ( 0 )
  200  ( 0 )
  300  ( 0 )
  400  ( 0 )
  500  ( 0 )
  600  ( 0 )
  700  ( 0 )
  800  ( 0 )
  900  ( 0 )
  1000  ( 0 )
  1100  ( 0 )
  1200  ( 0 )
  1300  ( 0 )
  1400  ( 0 )
  1500  ( 0 )
  1600  ( 0 )
  1700  ( 0 )
  1800  ( 0 )
  1900  ( 0 )
  2000  ( 0 )
  Total Time Elapsed:  0.02 

cat("Summary of Beta draws",fill=TRUE)
Summary of Beta draws
summary(out$betadraw,tvalues=beta)
Summary of Posterior Marginal Distributions 
Moments 
  tvalues mean std dev num se rel eff sam size
1     0.5 0.53   0.058 0.0041       9      180

Quantiles 
  tvalues 2.5%   5%  50%  95% 97.5%
1     0.5 0.41 0.43 0.54 0.62  0.63
   based on 1800 valid draws (burn-in=200) 
cat("Summary of Sigma draws",fill=TRUE)
Summary of Sigma draws
summary(out$Sigmadraw,tvalues=as.vector(Sigma[upper.tri(Sigma,diag=TRUE)]))
Posterior Means of Std Deviations and Correlation Matrix 
  Std Dev    1    2
1    0.98 1.00 0.81
2    0.99 0.81 1.00

Upper Triangle of Var-Cov Matrix 
Summary of Posterior Marginal Distributions 
Moments 
    tvalues mean std dev num se rel eff sam size
1,1     1.0 0.97   0.099 0.0027     1.3      900
1,2     0.8 0.79   0.108 0.0052     4.3      360
2,2     1.0 0.98   0.138 0.0080     6.0      300
   based on 1800 valid draws (burn-in=200) 

## plotting examples
#plot(out$betadraw)

# Replicate the same model using non-Bayesian method

library(AER)
Loading required package: car
Loading required package: lmtest
Loading required package: zoo

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

Loading required package: sandwich
Loading required package: survival
Loading required package: splines

x = simiv$x
y = simiv$y
d <- data.frame(z, w, x, y)

iv <- ivreg(y ~ x | X2, data = d)
summary(iv)

Call:
ivreg(formula = y ~ x | X2, data = d)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.9982 -0.5473  0.0144  0.6421  2.6707 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   1.0392     0.2003    5.19  5.2e-07 ***
x             0.5304     0.0609    8.72  1.2e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.991 on 198 degrees of freedom
Multiple R-Squared: 0.602,  Adjusted R-squared:  0.6 
Wald test:   76 on 1 and 198 DF,  p-value: 1.16e-15 

# Code it in Stan and compare the results

library(rstan)
Loading required package: Rcpp
Loading required package: inline

Attaching package: 'inline'

The following object is masked from 'package:Rcpp':

    registerPlugin

rstan (Version 2.1.0, packaged: 2013-12-27 18:21:10 UTC, GitRev: 548aa7bbbb89)

n = length(y)
z = d$X2

model <- "
data {
int<lower=0> n;
matrix[n,2] yt;
vector[n] z;
}

parameters {
real b;
real d;
real a;
real g;
real<lower=0,upper=100> sigma_t;
real<lower=0,upper=100> sigma_y;
real<lower=-1,upper=1> rho_yt;
}

transformed parameters {
cov_matrix[2] Sigma_yt;
matrix[n,2] yt_hat;

Sigma_yt[1,1] <- pow(sigma_y,2);
Sigma_yt[2,2] <- pow(sigma_t,2);
Sigma_yt[1,2] <- rho_yt*sigma_y*sigma_t;
Sigma_yt[2,1] <- Sigma_yt[1,2];


for (i in 1:n) {
  yt_hat[i,2] <- g + d*z[i];
  yt_hat[i,1] <- a + b*yt[i,2];
  }
}

model {
  for (i in 1:n)
  transpose(yt[i]) ~ multi_normal(transpose(yt_hat[i]), Sigma_yt);
}
"

d_list <- list(y1=y, y2=x)
d_matrix <- as.matrix(data.frame(d_list))
data <- list(yt=d_matrix, z=z, n=n)

fit <- stan(model_code = model, data=data, pars=c("a", "b", "d", "g", "sigma_t", "sigma_y", "rho_yt"))

TRANSLATING MODEL 'model' FROM Stan CODE TO C++ CODE NOW.
COMPILING THE C++ CODE FOR MODEL 'model' NOW.
SAMPLING FOR MODEL 'model' NOW (CHAIN 1).

Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  400 / 2000 [ 20%]  (Warmup)
Iteration:  600 / 2000 [ 30%]  (Warmup)
Iteration:  800 / 2000 [ 40%]  (Warmup)
Iteration: 1000 / 2000 [ 50%]  (Warmup)
Iteration: 1200 / 2000 [ 60%]  (Sampling)
Iteration: 1400 / 2000 [ 70%]  (Sampling)
Iteration: 1600 / 2000 [ 80%]  (Sampling)
Iteration: 1800 / 2000 [ 90%]  (Sampling)
Iteration: 2000 / 2000 [100%]  (Sampling)
Elapsed Time: 14.02 seconds (Warm-up)
              13.47 seconds (Sampling)
              27.49 seconds (Total)

SAMPLING FOR MODEL 'model' NOW (CHAIN 2).

Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  400 / 2000 [ 20%]  (Warmup)
Iteration:  600 / 2000 [ 30%]  (Warmup)
Iteration:  800 / 2000 [ 40%]  (Warmup)
Iteration: 1000 / 2000 [ 50%]  (Warmup)
Iteration: 1200 / 2000 [ 60%]  (Sampling)
Iteration: 1400 / 2000 [ 70%]  (Sampling)
Iteration: 1600 / 2000 [ 80%]  (Sampling)
Iteration: 1800 / 2000 [ 90%]  (Sampling)
Iteration: 2000 / 2000 [100%]  (Sampling)
Elapsed Time: 15.33 seconds (Warm-up)
              14.13 seconds (Sampling)
              29.46 seconds (Total)

SAMPLING FOR MODEL 'model' NOW (CHAIN 3).

Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  400 / 2000 [ 20%]  (Warmup)
Iteration:  600 / 2000 [ 30%]  (Warmup)
Iteration:  800 / 2000 [ 40%]  (Warmup)
Iteration: 1000 / 2000 [ 50%]  (Warmup)
Iteration: 1200 / 2000 [ 60%]  (Sampling)
Iteration: 1400 / 2000 [ 70%]  (Sampling)
Iteration: 1600 / 2000 [ 80%]  (Sampling)
Iteration: 1800 / 2000 [ 90%]  (Sampling)
Iteration: 2000 / 2000 [100%]  (Sampling)
Elapsed Time: 14.84 seconds (Warm-up)
              13.7 seconds (Sampling)
              28.54 seconds (Total)

SAMPLING FOR MODEL 'model' NOW (CHAIN 4).

Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  400 / 2000 [ 20%]  (Warmup)
Iteration:  600 / 2000 [ 30%]  (Warmup)
Iteration:  800 / 2000 [ 40%]  (Warmup)
Iteration: 1000 / 2000 [ 50%]  (Warmup)
Iteration: 1200 / 2000 [ 60%]  (Sampling)
Iteration: 1400 / 2000 [ 70%]  (Sampling)
Iteration: 1600 / 2000 [ 80%]  (Sampling)
Iteration: 1800 / 2000 [ 90%]  (Sampling)
Iteration: 2000 / 2000 [100%]  (Sampling)
Elapsed Time: 14.02 seconds (Warm-up)
              11.8 seconds (Sampling)
              25.82 seconds (Total)

print(fit, digits=3)
Inference for Stan model: model.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

           mean se_mean    sd    2.5%     25%     50%     75%   97.5%
a         1.030   0.008 0.208   0.641   0.891   1.025   1.163   1.457
b         0.534   0.003 0.063   0.403   0.492   0.536   0.577   0.650
d         4.161   0.010 0.254   3.663   3.989   4.156   4.333   4.660
g         1.027   0.006 0.144   0.743   0.929   1.027   1.124   1.306
sigma_t   0.985   0.001 0.049   0.897   0.951   0.982   1.018   1.088
sigma_y   0.995   0.003 0.071   0.872   0.946   0.989   1.039   1.146
rho_yt    0.807   0.001 0.032   0.737   0.786   0.809   0.830   0.864
lp__    -89.137   0.052 1.868 -93.652 -90.109 -88.815 -87.791 -86.459
        n_eff  Rhat
a         606 1.003
b         629 1.003
d         664 1.005
g         664 1.004
sigma_t  1348 1.000
sigma_y   785 1.001
rho_yt    864 1.001
lp__     1294 1.000

Samples were drawn using NUTS(diag_e) at Sat Dec 28 15:01:18 2013.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at 
convergence, Rhat=1).