Bayesian Data Analysis 2 - Hierarchical Models 1

This example will go through the basics of using JAGS (https://sourceforge.net/projects/mcmc-jags/files/JAGS/3.x/) by way of the rjags library, for estimation of simple linear and generalized linear models. You must install both JAGS and rjags for this to work.

We will use the BRFSS data for the state of Texas for our example, and use BMI as a continous outcome, and obesity status outcome (BMI >= 30) as a dichotomous outcome.

First we load our data and recode some variables:

library(rjags)

## Loading required package: coda
## Loading required package: lattice
## Linked to JAGS 3.3.0
## Loaded modules: basemod,bugs

library(dplyr)

## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(car)
load("~/Google Drive/dem7903_App_Hier/data/brfss_11.Rdata")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)
brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")
brfss_11$obese<-ifelse(brfss_11$bmi5/100 >=30, 1,0)
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=F)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=F)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=F)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=F)
#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)
brfss_11$agez<-as.numeric(scale(brfss_11$age, center=T, scale=T))
brfss_11$lowinc<-recode(brfss_11$incomg, recodes = "1:3=1; 4:5=0; else=NA")

Next, I use tselect the observations from Texas that are not missing on some key variables

tx<-brfss_11[brfss_11$statefip=="48"& is.na(brfss_11$obese)==F& is.na(brfss_11$black)==F& is.na(brfss_11$lths)==F& is.na(brfss_11$lowinc)==F ,]
nwncos<-table(tx$cofips)
nwncos #Number of people within counties

## 
## 48029 48133 48157 48201 48303 48329 48423 48439 48453 
##   863   498   772  1185   603   421   452   483   859

tx$conum<-rep(1:length(unique(tx$cofips)), nwncos[nwncos!=0])
length(unique(tx$conum)) #Number of counties

## [1] 9

Linear Mixed model Example

Here is a linear mixed model for bmi. This model includes a random intercept and a few predictor variables There a loads of ways to do this, but I like doing it this way. It uses what is known as “nested indexing” in the BUGS language This is basically a way of explicitly nesting individuals within groups. I write my code as a big string, then feed it to jags.

model1<-"
model{

#Likelihood
  for( i in 1:n)
    {
      bmi[i]~dnorm(mu[i], tau)
      mu[i]<-b0+b[1]*black[i]+b[2]*hisp[i]+b[3]*other[i]+b[4]*lths[i]+b[5]*coll[i]+u[cofips[i]]
    }

for (j in 1:ncos)
  {
    u[j]~dnorm(0, tau_u)
  }
#priors
b0~dnorm(0, .01)
for(j in 1:5) { b[j]~dnorm(0, .01)}
tau<-pow(sd, -2)
sd~dunif(0,100)

tau_u<-pow(sd_u, -2)
sd_u~dunif(0,100)

}
"

Next, we have to make a data list for jags, which contains anything we are reading into jags as data. I z-score the bmi variable prior to putting into the model

dat<-list(bmi=as.numeric(scale(tx$bmi5/100, center=T, scale=T)), obese=tx$obese, black=tx$black, hisp=tx$hispanic, other=tx$other, lths=tx$lths, coll=tx$coll, age=tx$agez, lowinc=tx$lowinc, n=length(tx$obese),cofips=tx$conum, ncos=length(unique(tx$cofips)))

#quick summary
lapply(dat, summary)

## $bmi
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.510  -0.680  -0.182   0.000   0.508   9.030 
## 
## $obese
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.283   1.000   1.000 
## 
## $black
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.0942  0.0000  1.0000 
## 
## $hisp
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.169   0.000   1.000 
## 
## $other
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.0582  0.0000  1.0000 
## 
## $lths
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  0.0000  0.0000  0.0922  0.0000  1.0000 
## 
## $coll
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   1.000   0.691   1.000   1.000 
## 
## $age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.5100 -0.6170  0.1620  0.0903  0.7740  2.5000 
## 
## $lowinc
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   0.000   0.376   1.000   1.000 
## 
## $n
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6140    6140    6140    6140    6140    6140 
## 
## $cofips
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    3.00    4.00    4.76    7.00    9.00 
## 
## $ncos
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       9       9       9       9       9       9

To use jags, we have to create a jags.model object, which contains the text representation of our model, our data, and some other parameters for the MCMC run

init.rng1<-list(".RNG.seed" = 1234, ".RNG.name" = "base::Mersenne-Twister")
init.rng2<-list(".RNG.seed" = 5678, ".RNG.name" = "base::Mersenne-Twister")
mod<-jags.model(file=textConnection(model1), data=dat,inits =list(init.rng1, init.rng2) , n.chains=2)

## Warning: Unused variable "obese" in data
## Warning: Unused variable "age" in data
## Warning: Unused variable "lowinc" in data

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 43094
## 
## Initializing model

#next, we update the model, this is the "burn in" period
update(mod, 1000)

Next, we examine a few other elements of the model, including the posterior densities of the parameters, and First, we must collect some samples of each parameter using the coda.samples() function.

#burn in for 10,000 iterations
update(mod, 10000)

#sample 2000 samples from each chain (5,000/5 = 1000 * 2 chains = 2000)
samps<-coda.samples(mod, variable.names=c("b0", "b", "sd", "sd_u"), n.iter=5000, n.thin=5)

#Numerical summary of each parameter, here I also include the 90% credible interval:
summary(samps, quantiles =  c(.025, .05, .95, .975))

## 
## Iterations = 12001:17000
## Thinning interval = 1 
## Number of chains = 2 
## Sample size per chain = 5000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##         Mean      SD Naive SE Time-series SE
## b[1]  0.4971 0.04417 4.42e-04       0.000552
## b[2]  0.1742 0.03720 3.72e-04       0.000571
## b[3] -0.1563 0.05453 5.45e-04       0.000661
## b[4]  0.1100 0.05096 5.10e-04       0.000900
## b[5] -0.0672 0.03138 3.14e-04       0.000887
## b0   -0.0277 0.03552 3.55e-04       0.001214
## sd    0.9836 0.00902 9.02e-05       0.000109
## sd_u  0.0563 0.02542 2.54e-04       0.000688
## 
## 2. Quantiles for each variable:
## 
##         2.5%      5%     95%   97.5%
## b[1]  0.4093  0.4243  0.5690  0.5845
## b[2]  0.1016  0.1128  0.2356  0.2470
## b[3] -0.2635 -0.2463 -0.0673 -0.0505
## b[4]  0.0105  0.0261  0.1936  0.2097
## b[5] -0.1271 -0.1182 -0.0148 -0.0048
## b0   -0.0975 -0.0863  0.0311  0.0424
## sd    0.9664  0.9689  0.9986  1.0014
## sd_u  0.0165  0.0217  0.1036  0.1173

The “effective sample size” tells us how many independent samples we have, out of the 1000* 2 chains (maximum = 2000 here). If this number is low, then we have a lot of autocorrelation in the chains

effectiveSize(samps)

##   b[1]   b[2]   b[3]   b[4]   b[5]     b0     sd   sd_u 
## 6415.1 4247.9 6832.1 3211.1 1252.8  855.3 6806.2 1397.1

#Examine convergence of the Markov chains using the Gelman-Brooks-Rubin diagnostic
gelman.diag(samps)

## Potential scale reduction factors:
## 
##      Point est. Upper C.I.
## b[1]          1       1.00
## b[2]          1       1.01
## b[3]          1       1.00
## b[4]          1       1.00
## b[5]          1       1.02
## b0            1       1.01
## sd            1       1.00
## sd_u          1       1.00
## 
## Multivariate psrf
## 
## 1

#here's a way to get p-values for each of the beta's from the model using the samples:
sampmat<-as.matrix(samps)
str(sampmat)

##  num [1:10000, 1:8] 0.479 0.544 0.449 0.363 0.534 ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:8] "b[1]" "b[2]" "b[3]" "b[4]" ...

head(sampmat)

##        b[1]   b[2]     b[3]    b[4]     b[5]        b0     sd    sd_u
## [1,] 0.4792 0.1900 -0.10163 0.07904 -0.07368 -0.021300 0.9842 0.03113
## [2,] 0.5441 0.1926 -0.19263 0.08153 -0.08015 -0.003315 0.9716 0.03409
## [3,] 0.4490 0.1223 -0.12613 0.10205 -0.07990  0.027161 0.9737 0.03913
## [4,] 0.3627 0.1395 -0.09562 0.07516 -0.08660  0.043738 0.9665 0.09570
## [5,] 0.5344 0.1467 -0.22094 0.09766 -0.10558  0.040411 0.9697 0.07939
## [6,] 0.5515 0.1588 -0.11270 0.06226 -0.10785  0.027470 0.9958 0.07199

apply(sampmat[, 1:5], 2, function(x) mean(x > 0)) # Get p(beta > 0)

##   b[1]   b[2]   b[3]   b[4]   b[5] 
## 1.0000 1.0000 0.0021 0.9851 0.0183

apply(sampmat[, 1:5], 2, function(x) mean(x < 0)) # Get p(beta < 0)

##   b[1]   b[2]   b[3]   b[4]   b[5] 
## 0.0000 0.0000 0.9979 0.0149 0.9817

Hierarchical LMM with random slopes and intercepts

Now, we expand the model to include random slopes and intercepts. We could just sample the random coefficients from independent normal distributions, but we should sample them from a Multivariate Normal. This takes a little more coding in order to make the prior on the covariance matrix of the MN Normal, but also to transform it back into the variance scale (vs. precision), and calculate the correlation parameter.

model2<-"
model{

#Likelihood
  for( i in 1:n)
    {
      bmi[i]~dnorm(mu[i], tau)
      mu[i]<-b[1]*black[i]+b[2]*hisp[i]+b[3]*other[i]+b[4]*lths[i]+b[5]*coll[i]+u[cofips[i], 1]*lowinc[i]+u[cofips[i], 2]
    }

for (j in 1:ncos)
  {
    u[j, 1:2] ~ dmnorm(meanu[], prec.Sigma[,])
  }

#priors
#NOTICE I removed the separate intercept, b0
for(j in 1:5) { b[j]~dnorm(0, .01)}

meanu[1]~dnorm(0, .001)
meanu[2]~dnorm(0, .001)
prec.Sigma[1:2, 1:2] ~ dwish(Omega[,], 2)  #Wishart is MV form of gamma on precision, it will be square, with nrow = ncol = # of correlated random effects, so if we have another random slope, it would be prec.Sigma[1:3, 1:3] ~ dwish(Omega[,], 3)

Sigma[1:2, 1:2]<-inverse(prec.Sigma[,]) #get the covariance matrix on the variance scale
rho12<-Sigma[1,2]/ sqrt(Sigma[1,1]* Sigma[2,2])

#Set some initial values for the covariance matrix
for (j in 1:2){ for (k in 1:2){  Omega[j,k] <-equals(j,k)*.1 } }

tau<-pow(sd, -2)
sd~dunif(0,100)

}
"

mod2<-jags.model(file=textConnection(model2), data=dat, n.chains=2,inits =list(init.rng1, init.rng2) )

## Warning: Unused variable "obese" in data
## Warning: Unused variable "age" in data

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 49371
## 
## Initializing model

update(mod2, 10000)

#collect 2000 samples of the parameters
samps2<-coda.samples(mod2, variable.names=c( "b", "Sigma", "rho12", "sd", "u"), n.iter=5000, n.thin=5)
effectiveSize(samps2)

## Sigma[1,1] Sigma[2,1] Sigma[1,2] Sigma[2,2]       b[1]       b[2] 
##       6387       6399       6399       7388       6136       4176 
##       b[3]       b[4]       b[5]      rho12         sd     u[1,1] 
##       7031       3581       1001       6305       6574       6802 
##     u[2,1]     u[3,1]     u[4,1]     u[5,1]     u[6,1]     u[7,1] 
##       6847       8145       7963       7451       8079       7644 
##     u[8,1]     u[9,1]     u[1,2]     u[2,2]     u[3,2]     u[4,2] 
##       7699       6994       2137       4106       1792       1833 
##     u[5,2]     u[6,2]     u[7,2]     u[8,2]     u[9,2] 
##       2587       3335       3275       2532       1800

#Numerical summary of each parameter:
summary(samps2, quantiles =  c(.025, .05, .95, .975))

## 
## Iterations = 11001:16000
## Thinning interval = 1 
## Number of chains = 2 
## Sample size per chain = 5000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                Mean      SD Naive SE Time-series SE
## Sigma[1,1]  0.02671 0.01774 1.77e-04       0.000222
## Sigma[2,1] -0.00717 0.01149 1.15e-04       0.000144
## Sigma[1,2] -0.00717 0.01149 1.15e-04       0.000144
## Sigma[2,2]  0.02262 0.01421 1.42e-04       0.000165
## b[1]        0.47758 0.04510 4.51e-04       0.000577
## b[2]        0.15330 0.03791 3.79e-04       0.000587
## b[3]       -0.15309 0.05513 5.51e-04       0.000658
## b[4]        0.09529 0.05100 5.10e-04       0.000857
## b[5]       -0.04030 0.03242 3.24e-04       0.001024
## rho12      -0.26456 0.31746 3.17e-03       0.003997
## sd          0.98244 0.00879 8.79e-05       0.000108
## u[1,1]      0.10737 0.06319 6.32e-04       0.000773
## u[2,1]     -0.08537 0.08016 8.02e-04       0.000969
## u[3,1]      0.06973 0.07953 7.95e-04       0.000881
## u[4,1]      0.10778 0.05619 5.62e-04       0.000630
## u[5,1]      0.09490 0.07219 7.22e-04       0.000838
## u[6,1]      0.04877 0.08627 8.63e-04       0.000960
## u[7,1]      0.14258 0.07959 7.96e-04       0.000910
## u[8,1]     -0.00270 0.08057 8.06e-04       0.000918
## u[9,1]      0.15818 0.06879 6.88e-04       0.000824
## u[1,2]     -0.02597 0.05075 5.07e-04       0.001101
## u[2,2]      0.04501 0.06715 6.72e-04       0.001050
## u[3,2]     -0.13925 0.04847 4.85e-04       0.001149
## u[4,2]     -0.08121 0.04542 4.54e-04       0.001062
## u[5,2]     -0.03382 0.05494 5.49e-04       0.001080
## u[6,2]     -0.02231 0.05898 5.90e-04       0.001021
## u[7,2]     -0.14203 0.06242 6.24e-04       0.001093
## u[8,2]      0.00133 0.05933 5.93e-04       0.001186
## u[9,2]     -0.16325 0.04857 4.86e-04       0.001147
## 
## 2. Quantiles for each variable:
## 
##                2.5%       5%      95%    97.5%
## Sigma[1,1]  0.00878  0.01002  0.05852  0.07237
## Sigma[2,1] -0.03473 -0.02679  0.00656  0.01001
## Sigma[1,2] -0.03473 -0.02679  0.00656  0.01001
## Sigma[2,2]  0.00790  0.00893  0.04843  0.05942
## b[1]        0.38936  0.40259  0.55108  0.56424
## b[2]        0.07910  0.09077  0.21501  0.22830
## b[3]       -0.26073 -0.24253 -0.06050 -0.04654
## b[4]       -0.00421  0.01364  0.18050  0.19578
## b[5]       -0.10328 -0.09313  0.01272  0.02270
## rho12      -0.77977 -0.72819  0.30988  0.42641
## sd          0.96529  0.96819  0.99707  0.99993
## u[1,1]     -0.01653  0.00400  0.21255  0.23246
## u[2,1]     -0.24647 -0.21895  0.04394  0.06582
## u[3,1]     -0.08645 -0.06175  0.19881  0.22783
## u[4,1]     -0.00109  0.01571  0.19976  0.21920
## u[5,1]     -0.04635 -0.02412  0.21328  0.23618
## u[6,1]     -0.11938 -0.09208  0.19095  0.21932
## u[7,1]     -0.01155  0.01238  0.27411  0.30150
## u[8,1]     -0.16260 -0.13561  0.12774  0.15185
## u[9,1]      0.02428  0.04469  0.27291  0.29363
## u[1,2]     -0.12764 -0.10931  0.05870  0.07350
## u[2,2]     -0.08499 -0.06414  0.15744  0.17856
## u[3,2]     -0.23329 -0.21833 -0.05993 -0.04389
## u[4,2]     -0.17033 -0.15719 -0.00698  0.00755
## u[5,2]     -0.14097 -0.12505  0.05665  0.07226
## u[6,2]     -0.13598 -0.11827  0.07603  0.09674
## u[7,2]     -0.26444 -0.24454 -0.04010 -0.02033
## u[8,2]     -0.11405 -0.09544  0.09949  0.12065
## u[9,2]     -0.25653 -0.24242 -0.08274 -0.06650

#Examine convergence of the Markov chains using the Gelman-Brooks-Rubin diagnostic
gelman.diag(samps2, multivariate = F)

## Potential scale reduction factors:
## 
##            Point est. Upper C.I.
## Sigma[1,1]          1       1.01
## Sigma[2,1]          1       1.01
## Sigma[1,2]          1       1.01
## Sigma[2,2]          1       1.00
## b[1]                1       1.00
## b[2]                1       1.00
## b[3]                1       1.00
## b[4]                1       1.00
## b[5]                1       1.00
## rho12               1       1.00
## sd                  1       1.00
## u[1,1]              1       1.00
## u[2,1]              1       1.00
## u[3,1]              1       1.00
## u[4,1]              1       1.00
## u[5,1]              1       1.00
## u[6,1]              1       1.00
## u[7,1]              1       1.00
## u[8,1]              1       1.00
## u[9,1]              1       1.00
## u[1,2]              1       1.00
## u[2,2]              1       1.00
## u[3,2]              1       1.00
## u[4,2]              1       1.00
## u[5,2]              1       1.00
## u[6,2]              1       1.00
## u[7,2]              1       1.00
## u[8,2]              1       1.00
## u[9,2]              1       1.00

So, again, our model looks good after burning in for 10,000 iterations
The densities don’t look too out of wack
- (not multi-modal, the densities from each chain line up well)
our traceplots reveal good mixing in the chains
Our effective sample sizes are all pretty large, (i.e. the chains are mixing well and providing independent samples at each iteration)
The Gelman-Brooks-Rubin diagnostics show that numerically, there is little to no variation between the chains

Comparing Models using the Deviance Information Criteria

In Bayesian models, there isn’t a direct equivalent of a likelihood ratio test, and traditionally people use the Deviance Information Criteria (DIC) as a measure of relative model fit (Spiegelhalter et al, 2002). You can think of this as a Bayesian equivalent of the AIC, if you’re used to working with that. It’s a measure of model information (deviance), penalized for complexity (# of parameters).

dic1<-dic.samples(mod, n.iter = 1000, type = "pD")
dic2<-dic.samples(mod2, n.iter = 1000, type = "pD")
dic1

## Mean deviance:  17208 
## penalty 12.4 
## Penalized deviance: 17221

dic2

## Mean deviance:  17195 
## penalty 21.6 
## Penalized deviance: 17217

We see the penalized deviance in model 2 (the random slopes + intercepts model) is only lower than the model with only random intercepts. While there is no hard and fast rule of thumb for how big a difference there has to be, in their orginal paper, they suggest using differences in DIC greater than 7 to indicate that one model is preferred to another.

Hierarchical GLMM (Logistic Regression Model)

Here, we fit the hierarchical logistic regression model, here I just fit the random slopes and

model3<-"
model{

#Likelihood
  for( i in 1:n)
    {
      obese[i]~dbern(p[i])
      logit(p[i])<-b[1]*black[i]+b[2]*hisp[i]+b[3]*other[i]+b[4]*lths[i]+b[5]*coll[i]+u[cofips[i], 1]*lowinc[i]+u[cofips[i], 2]
    }

for (j in 1:ncos)
  {
    u[j, 1:2] ~ dmnorm(meanu[], prec.Sigma[,])
  }

#priors
#NOTICE I removed the separate intercept, b0
for(j in 1:5) { b[j]~dnorm(0, .01)}

meanu[1]~dnorm(0, .001)
meanu[2]~dnorm(0, .001)
prec.Sigma[1:2, 1:2] ~ dwish(Omega[,], 2)  #Wishart is MV form of gamma on precision, it will be square, with nrow = ncol = # of correlated random effects, so if we have another random slope, it would be prec.Sigma[1:3, 1:3] ~ dwish(Omega[,], 3)

Sigma[1:2, 1:2]<-inverse(prec.Sigma[,]) #get the covariance matrix on the variance scale
rho12<-Sigma[1,2]/ sqrt(Sigma[1,1]* Sigma[2,2])

#Set some initial values for the covariance matrix
for (j in 1:2){ for (k in 1:2){  Omega[j,k] <-equals(j,k)*.1 } }

}
"

And now we fit the model:

load.module("glm")

## module glm loaded

mod3<-jags.model(file=textConnection(model3), data=dat, n.chains=2, inits =list(init.rng1, init.rng2) )

## Warning: Unused variable "bmi" in data
## Warning: Unused variable "age" in data

## Compiling model graph
##    Resolving undeclared variables
##    Allocating nodes
##    Graph Size: 49560
## 
## Initializing model

#GLMM updating can take longer the LMM's, so make yourself a drink
update(mod3, 10000)

#collect 2000 samples of the parameters
samps3<-coda.samples(mod3, variable.names=c( "b", "Sigma", "rho12", "sd", "u"), n.iter=5000, n.thin=5)

## Warning: Failed to set trace monitor for sd
## Node not found

effectiveSize(samps3)

## Sigma[1,1] Sigma[2,1] Sigma[1,2] Sigma[2,2]       b[1]       b[2] 
##       3509       3268       3268       3840       6289       5837 
##       b[3]       b[4]       b[5]      rho12     u[1,1]     u[2,1] 
##       4871       5209       2769       2827       4739       2897 
##     u[3,1]     u[4,1]     u[5,1]     u[6,1]     u[7,1]     u[8,1] 
##       4503       4435       4486       4438       3912       5087 
##     u[9,1]     u[1,2]     u[2,2]     u[3,2]     u[4,2]     u[5,2] 
##       3861       3200       2721       3317       3208       3209 
##     u[6,2]     u[7,2]     u[8,2]     u[9,2] 
##       3423       2810       3636       2889

#Numerical summary of each parameter:
summary(samps3, quantiles =  c(.025, .05, .95, .975))

## 
## Iterations = 10001:15000
## Thinning interval = 1 
## Number of chains = 2 
## Sample size per chain = 5000 
## 
## 1. Empirical mean and standard deviation for each variable,
##    plus standard error of the mean:
## 
##                Mean     SD Naive SE Time-series SE
## Sigma[1,1]  0.05092 0.0420 0.000420       0.000710
## Sigma[2,1] -0.02450 0.0318 0.000318       0.000556
## Sigma[1,2] -0.02450 0.0318 0.000318       0.000556
## Sigma[2,2]  0.04893 0.0361 0.000361       0.000586
## b[1]        0.78988 0.0950 0.000950       0.001197
## b[2]        0.25223 0.0829 0.000829       0.001085
## b[3]       -0.24772 0.1384 0.001384       0.001983
## b[4]        0.15362 0.1090 0.001090       0.001515
## b[5]       -0.12526 0.0735 0.000735       0.001418
## rho12      -0.42170 0.3314 0.003314       0.006272
## u[1,1]      0.29198 0.1251 0.001251       0.001816
## u[2,1]     -0.00202 0.1714 0.001714       0.003193
## u[3,1]      0.23332 0.1525 0.001525       0.002273
## u[4,1]      0.22194 0.1131 0.001131       0.001712
## u[5,1]      0.17300 0.1409 0.001409       0.002104
## u[6,1]      0.26902 0.1594 0.001594       0.002399
## u[7,1]      0.37123 0.1633 0.001633       0.002613
## u[8,1]      0.24381 0.1492 0.001492       0.002091
## u[9,1]      0.36945 0.1439 0.001439       0.002317
## u[1,2]     -0.95162 0.1111 0.001111       0.001982
## u[2,2]     -0.81915 0.1485 0.001485       0.002902
## u[3,2]     -1.18337 0.1098 0.001098       0.001906
## u[4,2]     -1.07138 0.1030 0.001030       0.001829
## u[5,2]     -1.01012 0.1202 0.001202       0.002133
## u[6,2]     -1.02069 0.1265 0.001265       0.002201
## u[7,2]     -1.20257 0.1410 0.001410       0.002662
## u[8,2]     -1.06812 0.1262 0.001262       0.002094
## u[9,2]     -1.28017 0.1148 0.001148       0.002135
## 
## 2. Quantiles for each variable:
## 
##                 2.5%        5%      95%   97.5%
## Sigma[1,1]  0.011976  0.014230  0.12589  0.1582
## Sigma[2,1] -0.106460 -0.080552  0.00616  0.0111
## Sigma[1,2] -0.106460 -0.080552  0.00616  0.0111
## Sigma[2,2]  0.013283  0.015542  0.11453  0.1429
## b[1]        0.602134  0.632823  0.94534  0.9748
## b[2]        0.092640  0.117423  0.38913  0.4151
## b[3]       -0.524596 -0.476269 -0.02158  0.0188
## b[4]       -0.062017 -0.025978  0.33193  0.3666
## b[5]       -0.268153 -0.244717 -0.00433  0.0202
## rho12      -0.884572 -0.851601  0.21651  0.3447
## u[1,1]      0.046049  0.086269  0.49796  0.5387
## u[2,1]     -0.365504 -0.299176  0.26323  0.3091
## u[3,1]     -0.075701 -0.020083  0.48049  0.5271
## u[4,1]     -0.000197  0.035877  0.40814  0.4471
## u[5,1]     -0.109276 -0.061293  0.40244  0.4464
## u[6,1]     -0.041505  0.008495  0.53481  0.5878
## u[7,1]      0.071796  0.113991  0.65269  0.7171
## u[8,1]     -0.048158 -0.000546  0.49018  0.5404
## u[9,1]      0.099448  0.140274  0.61445  0.6654
## u[1,2]     -1.170413 -1.132977 -0.76920 -0.7337
## u[2,2]     -1.098305 -1.056944 -0.56741 -0.5206
## u[3,2]     -1.401253 -1.365394 -1.00308 -0.9722
## u[4,2]     -1.275440 -1.244081 -0.90193 -0.8707
## u[5,2]     -1.240625 -1.206127 -0.81338 -0.7729
## u[6,2]     -1.273956 -1.229580 -0.81422 -0.7772
## u[7,2]     -1.490931 -1.441643 -0.97886 -0.9395
## u[8,2]     -1.316369 -1.276431 -0.86387 -0.8250
## u[9,2]     -1.512763 -1.473872 -1.09469 -1.0640

#Examine convergence of the Markov chains using the Gelman-Brooks-Rubin diagnostic
gelman.diag(samps3, multivariate = F)

## Potential scale reduction factors:
## 
##            Point est. Upper C.I.
## Sigma[1,1]          1       1.00
## Sigma[2,1]          1       1.00
## Sigma[1,2]          1       1.00
## Sigma[2,2]          1       1.00
## b[1]                1       1.01
## b[2]                1       1.00
## b[3]                1       1.00
## b[4]                1       1.00
## b[5]                1       1.00
## rho12               1       1.00
## u[1,1]              1       1.00
## u[2,1]              1       1.00
## u[3,1]              1       1.00
## u[4,1]              1       1.00
## u[5,1]              1       1.00
## u[6,1]              1       1.00
## u[7,1]              1       1.00
## u[8,1]              1       1.00
## u[9,1]              1       1.00
## u[1,2]              1       1.00
## u[2,2]              1       1.00
## u[3,2]              1       1.00
## u[4,2]              1       1.00
## u[5,2]              1       1.00
## u[6,2]              1       1.00
## u[7,2]              1       1.00
## u[8,2]              1       1.00
## u[9,2]              1       1.00

The model looks converged, other diagnostics are all good. The dic is:

dic.samples(mod3, n.iter=1000)

## Mean deviance:  7145 
## penalty 17.8 
## Penalized deviance: 7163

Bayesian Data Analysis 2 - Hierarchical Models 1

coreysparks

Linear Mixed model Example

Hierarchical LMM with random slopes and intercepts

Comparing Models using the Deviance Information Criteria

Hierarchical GLMM (Logistic Regression Model)