shige — Dec 28, 2013, 2:58 PM
# Use the "bayesm" package for a simple Bayesian IV model
library(bayesm)
set.seed(66)
simIV = function(delta,beta,Sigma,n,z,w,gamma) {
eps = matrix(rnorm(2*n),ncol=2) %*% chol(Sigma)
x = z %*% delta + eps[,1]; y = beta*x + eps[,2] + w%*%gamma
list(x=as.vector(x),y=as.vector(y)) }
n = 200 ; p=1 # number of instruments
z = cbind(rep(1,n),matrix(runif(n*p),ncol=p))
w = matrix(1,n,1)
rho=.8
Sigma = matrix(c(1,rho,rho,1),ncol=2)
delta = c(1,4); beta = .5; gamma = c(1)
simiv = simIV(delta,beta,Sigma,n,z,w,gamma)
R <- 2000
Mcmc1=list(); Data1 = list()
Data1$z = z; Data1$w=w; Data1$x=simiv$x; Data1$y=simiv$y
Mcmc1$R = R
Mcmc1$keep=1
out=rivGibbs(Data=Data1,Mcmc=Mcmc1)
Starting Gibbs Sampler for Linear IV Model
nobs= 200 ; 2 instruments; 1 included exog vars
Note: the numbers above include intercepts if in z or w
Prior Parms:
mean of delta
[1] 0 0
Adelta
[,1] [,2]
[1,] 0.01 0.00
[2,] 0.00 0.01
mean of beta/gamma
[1] 0 0
Abeta/gamma
[,1] [,2]
[1,] 0.01 0.00
[2,] 0.00 0.01
Sigma Prior Parms
nu= 3 V=
[,1] [,2]
[1,] 1 0
[2,] 0 1
MCMC parms: R= 2000 keep= 1
MCMC Iteration (est time to end -min)
100 ( 0 )
200 ( 0 )
300 ( 0 )
400 ( 0 )
500 ( 0 )
600 ( 0 )
700 ( 0 )
800 ( 0 )
900 ( 0 )
1000 ( 0 )
1100 ( 0 )
1200 ( 0 )
1300 ( 0 )
1400 ( 0 )
1500 ( 0 )
1600 ( 0 )
1700 ( 0 )
1800 ( 0 )
1900 ( 0 )
2000 ( 0 )
Total Time Elapsed: 0.02
cat("Summary of Beta draws",fill=TRUE)
Summary of Beta draws
summary(out$betadraw,tvalues=beta)
Summary of Posterior Marginal Distributions
Moments
tvalues mean std dev num se rel eff sam size
1 0.5 0.53 0.058 0.0041 9 180
Quantiles
tvalues 2.5% 5% 50% 95% 97.5%
1 0.5 0.41 0.43 0.54 0.62 0.63
based on 1800 valid draws (burn-in=200)
cat("Summary of Sigma draws",fill=TRUE)
Summary of Sigma draws
summary(out$Sigmadraw,tvalues=as.vector(Sigma[upper.tri(Sigma,diag=TRUE)]))
Posterior Means of Std Deviations and Correlation Matrix
Std Dev 1 2
1 0.98 1.00 0.81
2 0.99 0.81 1.00
Upper Triangle of Var-Cov Matrix
Summary of Posterior Marginal Distributions
Moments
tvalues mean std dev num se rel eff sam size
1,1 1.0 0.97 0.099 0.0027 1.3 900
1,2 0.8 0.79 0.108 0.0052 4.3 360
2,2 1.0 0.98 0.138 0.0080 6.0 300
based on 1800 valid draws (burn-in=200)
## plotting examples
#plot(out$betadraw)
# Replicate the same model using non-Bayesian method
library(AER)
Loading required package: car
Loading required package: lmtest
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
Loading required package: sandwich
Loading required package: survival
Loading required package: splines
x = simiv$x
y = simiv$y
d <- data.frame(z, w, x, y)
iv <- ivreg(y ~ x | X2, data = d)
summary(iv)
Call:
ivreg(formula = y ~ x | X2, data = d)
Residuals:
Min 1Q Median 3Q Max
-2.9982 -0.5473 0.0144 0.6421 2.6707
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.0392 0.2003 5.19 5.2e-07 ***
x 0.5304 0.0609 8.72 1.2e-15 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.991 on 198 degrees of freedom
Multiple R-Squared: 0.602, Adjusted R-squared: 0.6
Wald test: 76 on 1 and 198 DF, p-value: 1.16e-15
# Code it in Stan and compare the results
library(rstan)
Loading required package: Rcpp
Loading required package: inline
Attaching package: 'inline'
The following object is masked from 'package:Rcpp':
registerPlugin
rstan (Version 2.1.0, packaged: 2013-12-27 18:21:10 UTC, GitRev: 548aa7bbbb89)
n = length(y)
z = d$X2
model <- "
data {
int<lower=0> n;
matrix[n,2] yt;
vector[n] z;
}
parameters {
real b;
real d;
real a;
real g;
real<lower=0,upper=100> sigma_t;
real<lower=0,upper=100> sigma_y;
real<lower=-1,upper=1> rho_yt;
}
transformed parameters {
cov_matrix[2] Sigma_yt;
matrix[n,2] yt_hat;
Sigma_yt[1,1] <- pow(sigma_y,2);
Sigma_yt[2,2] <- pow(sigma_t,2);
Sigma_yt[1,2] <- rho_yt*sigma_y*sigma_t;
Sigma_yt[2,1] <- Sigma_yt[1,2];
for (i in 1:n) {
yt_hat[i,2] <- g + d*z[i];
yt_hat[i,1] <- a + b*yt[i,2];
}
}
model {
for (i in 1:n)
transpose(yt[i]) ~ multi_normal(transpose(yt_hat[i]), Sigma_yt);
}
"
d_list <- list(y1=y, y2=x)
d_matrix <- as.matrix(data.frame(d_list))
data <- list(yt=d_matrix, z=z, n=n)
fit <- stan(model_code = model, data=data, pars=c("a", "b", "d", "g", "sigma_t", "sigma_y", "rho_yt"))
TRANSLATING MODEL 'model' FROM Stan CODE TO C++ CODE NOW.
COMPILING THE C++ CODE FOR MODEL 'model' NOW.
SAMPLING FOR MODEL 'model' NOW (CHAIN 1).
Iteration: 1 / 2000 [ 0%] (Warmup)
Iteration: 200 / 2000 [ 10%] (Warmup)
Iteration: 400 / 2000 [ 20%] (Warmup)
Iteration: 600 / 2000 [ 30%] (Warmup)
Iteration: 800 / 2000 [ 40%] (Warmup)
Iteration: 1000 / 2000 [ 50%] (Warmup)
Iteration: 1200 / 2000 [ 60%] (Sampling)
Iteration: 1400 / 2000 [ 70%] (Sampling)
Iteration: 1600 / 2000 [ 80%] (Sampling)
Iteration: 1800 / 2000 [ 90%] (Sampling)
Iteration: 2000 / 2000 [100%] (Sampling)
Elapsed Time: 14.02 seconds (Warm-up)
13.47 seconds (Sampling)
27.49 seconds (Total)
SAMPLING FOR MODEL 'model' NOW (CHAIN 2).
Iteration: 1 / 2000 [ 0%] (Warmup)
Iteration: 200 / 2000 [ 10%] (Warmup)
Iteration: 400 / 2000 [ 20%] (Warmup)
Iteration: 600 / 2000 [ 30%] (Warmup)
Iteration: 800 / 2000 [ 40%] (Warmup)
Iteration: 1000 / 2000 [ 50%] (Warmup)
Iteration: 1200 / 2000 [ 60%] (Sampling)
Iteration: 1400 / 2000 [ 70%] (Sampling)
Iteration: 1600 / 2000 [ 80%] (Sampling)
Iteration: 1800 / 2000 [ 90%] (Sampling)
Iteration: 2000 / 2000 [100%] (Sampling)
Elapsed Time: 15.33 seconds (Warm-up)
14.13 seconds (Sampling)
29.46 seconds (Total)
SAMPLING FOR MODEL 'model' NOW (CHAIN 3).
Iteration: 1 / 2000 [ 0%] (Warmup)
Iteration: 200 / 2000 [ 10%] (Warmup)
Iteration: 400 / 2000 [ 20%] (Warmup)
Iteration: 600 / 2000 [ 30%] (Warmup)
Iteration: 800 / 2000 [ 40%] (Warmup)
Iteration: 1000 / 2000 [ 50%] (Warmup)
Iteration: 1200 / 2000 [ 60%] (Sampling)
Iteration: 1400 / 2000 [ 70%] (Sampling)
Iteration: 1600 / 2000 [ 80%] (Sampling)
Iteration: 1800 / 2000 [ 90%] (Sampling)
Iteration: 2000 / 2000 [100%] (Sampling)
Elapsed Time: 14.84 seconds (Warm-up)
13.7 seconds (Sampling)
28.54 seconds (Total)
SAMPLING FOR MODEL 'model' NOW (CHAIN 4).
Iteration: 1 / 2000 [ 0%] (Warmup)
Iteration: 200 / 2000 [ 10%] (Warmup)
Iteration: 400 / 2000 [ 20%] (Warmup)
Iteration: 600 / 2000 [ 30%] (Warmup)
Iteration: 800 / 2000 [ 40%] (Warmup)
Iteration: 1000 / 2000 [ 50%] (Warmup)
Iteration: 1200 / 2000 [ 60%] (Sampling)
Iteration: 1400 / 2000 [ 70%] (Sampling)
Iteration: 1600 / 2000 [ 80%] (Sampling)
Iteration: 1800 / 2000 [ 90%] (Sampling)
Iteration: 2000 / 2000 [100%] (Sampling)
Elapsed Time: 14.02 seconds (Warm-up)
11.8 seconds (Sampling)
25.82 seconds (Total)
print(fit, digits=3)
Inference for Stan model: model.
4 chains, each with iter=2000; warmup=1000; thin=1;
post-warmup draws per chain=1000, total post-warmup draws=4000.
mean se_mean sd 2.5% 25% 50% 75% 97.5%
a 1.030 0.008 0.208 0.641 0.891 1.025 1.163 1.457
b 0.534 0.003 0.063 0.403 0.492 0.536 0.577 0.650
d 4.161 0.010 0.254 3.663 3.989 4.156 4.333 4.660
g 1.027 0.006 0.144 0.743 0.929 1.027 1.124 1.306
sigma_t 0.985 0.001 0.049 0.897 0.951 0.982 1.018 1.088
sigma_y 0.995 0.003 0.071 0.872 0.946 0.989 1.039 1.146
rho_yt 0.807 0.001 0.032 0.737 0.786 0.809 0.830 0.864
lp__ -89.137 0.052 1.868 -93.652 -90.109 -88.815 -87.791 -86.459
n_eff Rhat
a 606 1.003
b 629 1.003
d 664 1.005
g 664 1.004
sigma_t 1348 1.000
sigma_y 785 1.001
rho_yt 864 1.001
lp__ 1294 1.000
Samples were drawn using NUTS(diag_e) at Sat Dec 28 15:01:18 2013.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).