The Integrated Nested Laplace Approximation, or INLA, approach is a recently developed, computationally simpler method for fitting Bayesian models [(Rue et al., 2009, compared to traditional Markov Chain Monte Carlo (MCMC) approaches. INLA fits models that are classified as latent Gaussian models, which are applicable in many settings (Martino & Rue, 2010. In general, INLA fits a general form of additive models such as:
\(\eta = \alpha + \sum_{j=1}^{nf} f^{(j)}(u_{ij}) + \sum_{k=1}^{n\beta}\beta_k z_{ki} + \epsilon_i\)
where \(\eta\) is the linear predictor for a generalized linear model formula, and is composed of a linear function of some variables u, \(\beta\) are the effects of covariates, z, and \(\epsilon\) is an unstructured residual (Rue et al., 2009). As this model is often parameterized as a Bayesian one, we are interested in the posterior marginal distributions of all the model parameters. Rue and Martino (2007) show that the posterior marginal for the random effects (x) in such models can be approximated as:
\(\tilde{p}(x_i|y) = \sum_k \tilde{p}(x_i|\theta_k, y) \tilde{p}(\theta_k|y) \Delta_k\)
via numerical integration (Rue & Martino, 2007; Schrodle & Held, 2011a, 2011b). The posterior distribution of the hyperparameters (\(\theta\)) of the model can also be approximated as:
\(\tilde{p}(\theta | y)) \propto \frac{p(x, \theta, y)}{\tilde{p}G(x| \theta,y)} \mid _{x} = x^*(\theta)\)
, where G is a Gaussian approximation of the posterior and \(x^*(\theta)\) is the mode of the conditional distribution of \(p(x|\theta,y)\). Thus, instead of using MCMC to find an iterative, sampling-based estimate of the posterior, it is arrived at numerically. This method of fitting the spatial models specified above has been presented by numerous authors (Blangiardo & Cameletti, 2015; Blangiardo et al., 2013; Lindgren & Rue, 2015; Martins et al., 2013; Schrodle & Held, 2011a, 2011b), with comparable results to MCMC.
Above, we saw how to use INLA to fit a Bayesian regression model to areal data (US Counties). This example will focus on how to use INLA to fit a Bayesian multi-level model, where our outcome is observed at the individual level, and we may or may not have information avaialble at a higher level of aggregation.
In this example, we use data from the Integrated Health Interview Series to fit an Age-Period-Cohort model. Discussion of these models can be found in the recent text on the subject. In addition to the APC model, hierarchical models could consider county or city of residence, or some other geography.
Here, we load the IHIS data and do some recodes.
library(readstata13)
library(car)
library(INLA)
## Loading required package: sp
## Loading required package: Matrix
## Loading required package: splines
dat<- read.dta13("~/Google Drive/dem7903_App_Hier/data/ihis_00008.dta", convert.factors = F)
names(dat)
## [1] "year" "serial" "strata" "psu"
## [5] "nhishid" "hhweight" "nhispid" "hhx"
## [9] "fmx" "px" "perweight" "sampweight"
## [13] "fweight" "astatflg" "cstatflg" "pernum"
## [17] "age" "sex" "marstat" "marst"
## [21] "birthyr" "racea" "hispeth" "yrsinus"
## [25] "hispyn" "usborn" "citizen" "racenew"
## [29] "racebr" "educrec2" "educrec1" "educ"
## [33] "health" "bmicalc" "alcstat1" "smokestatus2"
## [37] "mod10fwk" "vig10fwk" "nhisiid"
names(dat)<-tolower(names(dat))
dat<-dat[dat$age>=18,]
dat$goodhealth<-recode(dat$health, recodes="1:3=0; 4:5=1; else=NA")
dat$bmi_clean<-ifelse(dat$bmicalc%in%c(0,996),NA, dat$bmicalc)
dat$obese<-ifelse(dat$bmi_clean>=30,1,0)
dat$hisp<-ifelse(dat$hispeth==10, 0, ifelse(dat$hispeth %in%c(90:93), NA, 1))
dat$race<-recode(dat$racea, recodes="100=1; 200=2; 300:340=3; 400:434=4; 500:600=5; else=NA")
dat$race_eth<-factor(ifelse(dat$hisp==1, "Hispanic",
ifelse(dat$hisp==0&dat$race==1, "NHWhite",
ifelse(dat$hisp==0&dat$race==2, "NHBlack",
ifelse(dat$hisp==0&dat$race==3, "NHNAHN",
ifelse(dat$hisp==0&dat$race==4, "NHAsian",
ifelse(dat$hisp==0&dat$race==5, "NHAsian", NA)))))))
dat$educ<-recode(dat$educrec2, recodes="10:41='lths'; 42='hs';50:53='somecoll'; 54:60='colledu'; else=NA", as.factor.result=T, levels = c("lths", "hs", "somecoll", "colledu"))
dat$male<-recode(dat$sex, recodes="1=1;2=0")
dat$agec<-cut(dat$age, breaks =seq(15, 100, 5))
dat$currsmoke<-recode(dat$smokestatus2, recodes="0=NA;90=NA; 10:13=1; else=0", as.factor.result = F)
dat$currdrink<-recode(dat$alcstat1, recodes="0='never'; 1:2='former'; 3='current'; else=NA", as.factor.result = T)
dat$birth_year<-ifelse(dat$birthyr>=9997, NA, dat$birthyr)
dat$age2<-dat$age^2
dat<-subset(dat, complete.cases(goodhealth,bmi_clean, educ, currsmoke, currdrink, birth_year))
dat$bmiz<- scale(dat$bmi_clean)
dat$cohort<- cut(dat$birth_year, breaks=seq(1915, 2000, 5))
set.seed(1115)
samps<-sample(1:nrow(dat), size = 75000, replace = F) #take a sample of subjects, because the NHIS is huge.
dat<- dat[samps,]
Below we fit the model using INLA. The APC model basically has three random effects, one for Age, one for period (year of survey) and a third random effect for cohort (birth year). If we have a continuous outcome, our model structure would be :
\(y_{ijkl} = \mu_{jkl} +\sum_c \beta_c x_{ik} + e_{ij}\) with \(\mu_{jkl} = \mu + u_j + v_k + \tau_l\)
\(u_j \sim N(0, \sigma^2_u)\)
\(v_k \sim N(0, \sigma^2_v)\)
\(\tau_l \sim N(0, \sigma^2_\tau)\)
library(lme4)
#user lmer to find good starting values for INLA
fit_in1.mer0<- lmer(bmiz~ race_eth+educ+male+currsmoke+currdrink+agec+(1|year)+(1|cohort), data=dat)
fit_in1.mer1<- lmer(bmiz~ race_eth+educ+male+currsmoke+currdrink+(1|agec)+(1|year)+(1|cohort), data=dat)
#INLA parameterizes variances on the log-precision scale, so if we take log(1/variance), we can get that number and feed it to INLA
vc0<-as.data.frame(VarCorr(fit_in1.mer0))
vc1<-as.data.frame(VarCorr(fit_in1.mer1))
vc0$init<-log(1/vc0$vcov); vc0
## grp var1 var2 vcov sdcor init
## 1 cohort (Intercept) <NA> 0.001480208 0.03847348 6.51557246
## 2 year (Intercept) <NA> 0.005698834 0.07549062 5.16749364
## 3 Residual <NA> <NA> 0.920389201 0.95936917 0.08295865
vc1$init<-log(1/vc1$vcov); vc1
## grp var1 var2 vcov sdcor init
## 1 cohort (Intercept) <NA> 0.00170954 0.04134659 6.37153073
## 2 year (Intercept) <NA> 0.00578871 0.07608357 5.15184586
## 3 agec (Intercept) <NA> 0.04411133 0.21002697 3.12103867
## 4 Residual <NA> <NA> 0.92036819 0.95935822 0.08298148
#Feed the init values to the hyperparameter inits in each f()
#Age as fixed
fit_in0<- inla(bmiz~ race_eth+educ+male+currsmoke+currdrink+agec+f(year, model="iid",hyper = list(prec=list(initial=vc0$init[2])) )+f(cohort, model="iid",hyper = list(prec=list(initial=vc0$init[1]))), family = "gaussian", data=dat, num.threads = 1, verbose = F, control.inla=list(strategy='gaussian'), control.family = list(hyper = list(prec=list(initial=vc0$init[3]))), control.compute = list(dic=T))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 2991469 159.8 5684620 303.6 5684620 303.6
## Vcells 33128471 252.8 101374715 773.5 95201369 726.4
#Age as unstructured random effect
fit_in1<- inla(bmiz~ race_eth+educ+male+currsmoke+currdrink+f(agec, model="iid",hyper = list(prec=list(initial=vc1$init[3])))+f(year, model="iid",hyper = list(prec=list(initial=vc1$init[2])) )+f(cohort, model="iid",hyper = list(prec=list(initial=vc1$init[1]))), family = "gaussian", data=dat, num.threads = 1, verbose = F, control.inla=list(strategy='gaussian'), control.family = list(hyper = list(prec=list(initial=vc1$init[4]))), control.compute = list(dic=T))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3743829 200.0 5684620 303.6 5684620 303.6
## Vcells 48308034 368.6 101374715 773.5 101257573 772.6
#Age and year as smooth random effect
fit_in2<- inla(bmiz~ race_eth+educ+male+currsmoke+currdrink+f(agec, model="rw2",hyper = list(prec=list(initial=vc1$init[3])))+f(year, model="rw1",hyper = list(prec=list(initial=vc1$init[2])) )+f(cohort, model="iid",hyper = list(prec=list(initial=vc1$init[1]))), family = "gaussian", data=dat, num.threads = 1, verbose = F, control.inla=list(strategy='gaussian'), control.family = list(hyper = list(prec=list(initial=vc1$init[4]))), control.compute = list(dic=T))
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 4496243 240.2 6861544 366.5 6861544 366.5
## Vcells 63488287 484.4 121729658 928.8 121540928 927.3
summary(fit_in0)
##
## Call:
## c("inla(formula = bmiz ~ race_eth + educ + male + currsmoke + currdrink + ", " agec + f(year, model = \"iid\", hyper = list(prec = list(initial = vc0$init[2]))) + ", " f(cohort, model = \"iid\", hyper = list(prec = list(initial = vc0$init[1]))), ", " family = \"gaussian\", data = dat, verbose = F, control.compute = list(dic = T), ", " control.family = list(hyper = list(prec = list(initial = vc0$init[3]))), ", " control.inla = list(strategy = \"gaussian\"), num.threads = 1)" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 3.9665 225.9761 4.9855 234.9281
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -0.4245 0.0337 -0.4894 -0.4250 -0.3567 -0.4260 0
## race_ethNHAsian -0.4906 0.0189 -0.5278 -0.4906 -0.4535 -0.4906 0
## race_ethNHBlack 0.1762 0.0129 0.1509 0.1762 0.2015 0.1762 0
## race_ethNHNAHN 0.2001 0.0439 0.1138 0.2001 0.2862 0.2001 0
## race_ethNHWhite -0.0924 0.0104 -0.1129 -0.0924 -0.0719 -0.0924 0
## educhs -0.0360 0.0115 -0.0585 -0.0360 -0.0136 -0.0360 0
## educsomecoll -0.0512 0.0118 -0.0744 -0.0512 -0.0280 -0.0512 0
## educcolledu -0.2777 0.0127 -0.3025 -0.2777 -0.2528 -0.2777 0
## male 0.1252 0.0071 0.1112 0.1252 0.1392 0.1252 0
## currsmoke -0.1662 0.0091 -0.1840 -0.1662 -0.1483 -0.1662 0
## currdrinkformer 0.0658 0.0078 0.0505 0.0658 0.0811 0.0658 0
## currdrinknever 0.0000 31.6228 -62.0862 -0.0009 62.0344 0.0000 0
## agec(20,25 0.2460 0.0237 0.1994 0.2461 0.2923 0.2461 0
## agec(25,30 0.4144 0.0264 0.3622 0.4145 0.4657 0.4147 0
## agec(30,35 0.5038 0.0286 0.4468 0.5041 0.5590 0.5047 0
## agec(35,40 0.5824 0.0298 0.5226 0.5830 0.6394 0.5841 0
## agec(40,45 0.6157 0.0300 0.5549 0.6164 0.6726 0.6179 0
## agec(45,50 0.6442 0.0304 0.5823 0.6450 0.7017 0.6467 0
## agec(50,55 0.6862 0.0312 0.6225 0.6872 0.7449 0.6891 0
## agec(55,60 0.6912 0.0319 0.6259 0.6922 0.7511 0.6943 0
## agec(60,65 0.6895 0.0327 0.6224 0.6905 0.7509 0.6927 0
## agec(65,70 0.6375 0.0330 0.5699 0.6385 0.6999 0.6403 0
## agec(70,75 0.5437 0.0334 0.4760 0.5444 0.6075 0.5458 0
## agec(75,80 0.4378 0.0345 0.3684 0.4383 0.5045 0.4391 0
## agec(80,85 0.1653 0.0345 0.0964 0.1656 0.2326 0.1661 0
## agec(85,90 0.0000 31.6228 -62.0862 -0.0009 62.0344 0.0000 0
## agec(90,95 0.0000 31.6228 -62.0862 -0.0009 62.0344 0.0000 0
## agec(95,100 0.0000 31.6228 -62.0862 -0.0009 62.0344 0.0000 0
##
## Random effects:
## Name Model
## year IID model
## cohort IID model
##
## Model hyperparameters:
## mean sd 0.025quant
## Precision for the Gaussian observations 1.087 0.0056 1.075
## Precision for year 203.659 77.5873 90.247
## Precision for cohort 1242.642 897.0037 311.330
## 0.5quant 0.975quant mode
## Precision for the Gaussian observations 1.087 1.097 1.087
## Precision for year 191.125 389.884 167.979
## Precision for cohort 999.912 3608.584 676.178
##
## Expected number of effective parameters(std dev): 49.30(1.534)
## Number of equivalent replicates : 1521.25
##
## Deviance Information Criterion (DIC) ...: 206673.42
## Effective number of parameters .........: 51.35
##
## Marginal log-Likelihood: -103540.07
## Posterior marginals for linear predictor and fitted values computed
summary(fit_in1)
##
## Call:
## c("inla(formula = bmiz ~ race_eth + educ + male + currsmoke + currdrink + ", " f(agec, model = \"iid\", hyper = list(prec = list(initial = vc1$init[3]))) + ", " f(year, model = \"iid\", hyper = list(prec = list(initial = vc1$init[2]))) + ", " f(cohort, model = \"iid\", hyper = list(prec = list(initial = vc1$init[1]))), ", " family = \"gaussian\", data = dat, verbose = F, control.compute = list(dic = T), ", " control.family = list(hyper = list(prec = list(initial = vc1$init[4]))), ", " control.inla = list(strategy = \"gaussian\"), num.threads = 1)" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 2.9191 515.0973 7.2873 525.3036
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.0645 0.0610 -0.0562 0.0644 0.1852 0.0644 0
## race_ethNHAsian -0.4905 0.0189 -0.5277 -0.4905 -0.4534 -0.4905 0
## race_ethNHBlack 0.1763 0.0129 0.1510 0.1763 0.2016 0.1763 0
## race_ethNHNAHN 0.2001 0.0439 0.1139 0.2001 0.2863 0.2001 0
## race_ethNHWhite -0.0923 0.0104 -0.1128 -0.0923 -0.0719 -0.0923 0
## educhs -0.0360 0.0115 -0.0585 -0.0360 -0.0135 -0.0360 0
## educsomecoll -0.0512 0.0118 -0.0744 -0.0512 -0.0280 -0.0512 0
## educcolledu -0.2774 0.0127 -0.3022 -0.2774 -0.2525 -0.2774 0
## male 0.1252 0.0071 0.1112 0.1252 0.1392 0.1252 0
## currsmoke -0.1660 0.0091 -0.1839 -0.1660 -0.1482 -0.1660 0
## currdrinkformer 0.0657 0.0078 0.0504 0.0657 0.0810 0.0657 0
## currdrinknever 0.0000 31.6228 -62.0862 -0.0009 62.0344 0.0000 0
##
## Random effects:
## Name Model
## agec IID model
## year IID model
## cohort IID model
##
## Model hyperparameters:
## mean sd 0.025quant
## Precision for the Gaussian observations 1.087 0.0056 1.075
## Precision for agec 26.039 9.8518 11.470
## Precision for year 201.456 77.1041 89.283
## Precision for cohort 1042.191 725.8704 262.904
## 0.5quant 0.975quant mode
## Precision for the Gaussian observations 1.087 1.098 1.087
## Precision for agec 24.520 49.602 21.661
## Precision for year 188.795 387.007 165.552
## Precision for cohort 851.253 2949.053 582.223
##
## Expected number of effective parameters(std dev): 49.58(1.433)
## Number of equivalent replicates : 1512.59
##
## Deviance Information Criterion (DIC) ...: 206671.66
## Effective number of parameters .........: 51.66
##
## Marginal log-Likelihood: -103489.79
## Posterior marginals for linear predictor and fitted values computed
summary(fit_in2) #lowest DIC = best fitting model
##
## Call:
## c("inla(formula = bmiz ~ race_eth + educ + male + currsmoke + currdrink + ", " f(agec, model = \"rw2\", hyper = list(prec = list(initial = vc1$init[3]))) + ", " f(year, model = \"rw1\", hyper = list(prec = list(initial = vc1$init[2]))) + ", " f(cohort, model = \"iid\", hyper = list(prec = list(initial = vc1$init[1]))), ", " family = \"gaussian\", data = dat, verbose = F, control.compute = list(dic = T), ", " control.family = list(hyper = list(prec = list(initial = vc1$init[4]))), ", " control.inla = list(strategy = \"gaussian\"), num.threads = 1)" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 3.4420 820.5483 9.5514 833.5418
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -0.0751 0.0289 -0.1340 -0.0746 -0.0195 -0.0735 0
## race_ethNHAsian -0.4910 0.0189 -0.5281 -0.4910 -0.4539 -0.4910 0
## race_ethNHBlack 0.1762 0.0129 0.1509 0.1762 0.2014 0.1762 0
## race_ethNHNAHN 0.1995 0.0439 0.1133 0.1995 0.2856 0.1995 0
## race_ethNHWhite -0.0923 0.0104 -0.1128 -0.0923 -0.0718 -0.0923 0
## educhs -0.0359 0.0115 -0.0584 -0.0359 -0.0135 -0.0359 0
## educsomecoll -0.0512 0.0118 -0.0744 -0.0512 -0.0280 -0.0512 0
## educcolledu -0.2774 0.0126 -0.3022 -0.2774 -0.2526 -0.2774 0
## male 0.1252 0.0071 0.1112 0.1252 0.1392 0.1252 0
## currsmoke -0.1658 0.0091 -0.1836 -0.1658 -0.1479 -0.1658 0
## currdrinkformer 0.0655 0.0078 0.0502 0.0655 0.0808 0.0655 0
## currdrinknever 0.0000 31.6228 -62.0862 -0.0009 62.0344 0.0000 0
##
## Random effects:
## Name Model
## agec RW2 model
## year RW1 model
## cohort IID model
##
## Model hyperparameters:
## mean sd 0.025quant
## Precision for the Gaussian observations 1.091 0.0035 1.086
## Precision for agec 438.808 175.4759 195.067
## Precision for year 1897.721 921.9709 721.513
## Precision for cohort 623.439 204.5413 244.916
## 0.5quant 0.975quant mode
## Precision for the Gaussian observations 1.09 1.099 1.088
## Precision for agec 406.01 872.506 348.516
## Precision for year 1696.34 4242.172 1368.156
## Precision for cohort 627.09 1009.581 724.702
##
## Expected number of effective parameters(std dev): 42.99(1.502)
## Number of equivalent replicates : 1744.54
##
## Deviance Information Criterion (DIC) ...: 206659.87
## Effective number of parameters .........: 43.84
##
## Marginal log-Likelihood: -103456.86
## Posterior marginals for linear predictor and fitted values computed
Plot some of the effects:
par(mfrow=(c(2,1)))
plot( fit_in1$summary.random$agec$mean, type="l",xaxt="n", ylab="Age Random Effect (IID)", main="Age random effect- IID specification", ylim=c(-1,1))
axis(1, at=1:17, labels=levels(dat$agec))
lines(fit_in1$summary.random$agec$`0.025quant`, col=1, lty=5)
lines(fit_in1$summary.random$agec$`0.975quant`, col=1, lty=5)
abline(h=0, lwd=2)
legend("topright", legend = c("Posterior Mean", "95% Credible Interval"), lty=c(1,5), cex=.75)
plot( fit_in2$summary.random$agec$mean, type="l",xaxt="n", ylab="Age Random Effect (RW2)", main="Age random effect - RW2 specification", ylim=c(-1,1))
axis(1, at=1:17, labels=levels(dat$agec))
lines(fit_in2$summary.random$agec$`0.025quant`, col=1, lty=5)
lines(fit_in2$summary.random$agec$`0.975quant`, col=1, lty=5)
abline(h=0, lwd=2)
legend("topright", legend = c("Posterior Mean", "95% Credible Interval"), lty=c(1,5), cex=.75)
plot(fit_in1$summary.random$year$mean, type="l",xaxt="n", ylab="Period Random Effect (IID)", main="Period random effect- IID specification", ylim=c(-.3, .3))
yrs<-unique(dat$year); yrs<-yrs[order(yrs)]
axis(1, at=1:length(yrs), labels=yrs)
lines(fit_in1$summary.random$year$`0.025quant`, col=1, lty=5)
lines(fit_in1$summary.random$year$`0.975quant`, col=1, lty=5)
abline(h=0, lwd=2)
legend("topright", legend = c("Posterior Mean", "95% Credible Interval"), lty=c(1,5), cex=.75)
plot(fit_in2$summary.random$year$mean, type="l",xaxt="n", ylab="Period Random Effect (RW1)", main="Period random effect- RW1 specification", ylim=c(-.3, .3))
yrs<-unique(dat$year); yrs<-yrs[order(yrs)]
axis(1, at=1:length(yrs), labels=yrs)
lines(fit_in2$summary.random$year$`0.025quant`, col=1, lty=5)
lines(fit_in2$summary.random$year$`0.975quant`, col=1, lty=5)
abline(h=0, lwd=2)
legend("topright", legend = c("Posterior Mean", "95% Credible Interval"), lty=c(1,5), cex=.75)
par(mfrow=c(1,1))
plot(fit_in1$summary.random$cohort$mean, type="l", xaxt="n", ylab="Cohort Random Effect (IID)", main="Cohort random effect", ylim=c(-.2, .2))
coh<-unique(dat$cohort); coh<-coh[order(coh)]
axis(1, at=1:length(coh), labels=coh)
lines(fit_in1$summary.random$cohort$`0.025quant`, col=1, lty=5)
lines(fit_in1$summary.random$cohort$`0.975quant`, col=1, lty=5)
abline(h=0, lwd=2)
legend("topright", legend = c("Posterior Mean", "95% Credible Interval"), lty=c(1,5), cex=.75)
Plot the marginals for the variance terms
m1<- inla.tmarginal(
function(x) 1/x,
fit_in2$marginals.hyperpar$`Precision for agec`)
m2<- inla.tmarginal(
function(x) 1/x,
fit_in2$marginals.hyperpar$`Precision for year`)
m3<- inla.tmarginal(
function(x) 1/x,
fit_in2$marginals.hyperpar$`Precision for cohort`)
plot(m1, type="l", main="Posterior distibution for age variance")
plot(m2, type="l", main="Posterior distibution for year variance")
plot(m3, type="l", main="Posterior distibution for cohort variance")