Bayesian Multi-level Regression Models Using INLA

Last time, we saw how to use INLA to fit a Bayesian regression model to areal data (US Counties). This example will focus on how to use INLA to fit a Bayesian multi-level model, where our outcome is observed at the individual level, and we may or may not have information avaialble at a higher level of aggregation. This could include county or city of residence, or some other geography.

This example shows how to: * Examine variation between groups using fixed effects * Fit the basic random intercept model : +\(y_{ij} = \mu + u_{j} + e_{ij}\) with \(u_j \sim N(0, \sigma^2)\) Where the intercepts (\(u_j\)) for each group vary randomly around the overall mean (\(\mu\))

*I also illustrate how to include group-level covariates and how to fit the random slopes model and the model for cross level interactions

The example merges data from the 2011 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART county data. Link and the 2010 American Community Survey 5-year estimates at the county level. More details on these data are found below.

library(INLA)
library(car)
#load brfss
load("~/Google Drive/dem7283/data/brfss_11.Rdata")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)

brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")

#bmi
brfss_11$bmi<-ifelse(is.na(brfss_11$bmi5)==T, NA, brfss_11$bmi5/100)
#poor or fair health
brfss_11$badhealth<-ifelse(brfss_11$genhlth %in% c(4,5),1,0)

#race
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=T)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=T)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=T)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=T)

#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)

#employment
brfss_11$employ<-recode(brfss_11$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor.result=T)
brfss_11$employ<-relevel(brfss_11$employ, ref='Employed')

#marital status
brfss_11$marst<-recode(brfss_11$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
brfss_11$marst<-relevel(brfss_11$marst, ref='married')

#income
brfss_11$inc<-as.factor(ifelse(brfss_11$incomg==9, NA, brfss_11$incomg))

#Age cut into intervals
brfss_11$agec<-cut(brfss_11$age, breaks=c(0,24,39,59,79,99))

Higher level predictors

We will often be interested in factors at both the individual AND contextual levels. To illustrate this, I will use data from the American Community Survey measured at the county level. Specifically, I use the DP3 table, which provides economic characteristics of places, from the 2010 5 year ACS Link.

#I will also add in some Census variables from the ACS 2010 5 Year estimates
#load in ACS data from Factfinder
acsecon<-read.csv("~/Google Drive/dem7283/data/aff_download/ACS_10_5YR_DP03_with_ann.csv")
acsecon$povrate<-acsecon[, "HC03_VC156"]
acsecon$unemployed<-acsecon[, "HC03_VC13"]
acsecon$cofips<-substr(acsecon$GEO.id, 10,14)
acsecon$povz<-scale(acsecon$povrate, center=T, scale=T)
acsecon$unempz<-scale(acsecon$unemployed, center=T, scale=T)
acsecon<-acsecon[, c("cofips", "povrate","povz", "unemployed","unempz")]

head(acsecon)

##   cofips povrate       povz unemployed       unempz
## 1  01001     7.5 -0.6960015        6.2 -0.396270981
## 2  01003     9.1 -0.4069846        6.6 -0.277120527
## 3  01005    19.9  1.5438794        9.6  0.616507876
## 4  01007     9.4 -0.3527939        9.1  0.467569809
## 5  01009    10.0 -0.2444126        7.5 -0.009032006
## 6  01011    22.6  2.0315954       11.2  1.093109691

joindata<-merge(brfss_11, acsecon, by="cofips", all.x=T, all.y=F)

## Warning in merge.data.frame(brfss_11, acsecon, by = "cofips", all.x = T, :
## column names 'cholchk', 'mrace' are duplicated in the result

#and merge the data back to the kids data
joindata$bmiz<-scale(joindata$bmi, center=T, scale=T)
joindata$agez<-scale(joindata$age, center=T, scale=T)

So now we have our multi-level dataset. Now is the tricky part. We need a shapefile to get our county geographies, but we only need the counties in the shapefile that match the counties in the BRFSS data. INLA will fail if we have counties in our neighbor list that aren’t in the BRFSS data, which are a lot, becuase in 2011 the BRFSS only had 225 counties.

This next part of the code gets only the counties from the shapefile that match counties in the BRFSS.

#####read in us shapefile#####
library(maptools); library(spdep); gpclibPermit()

## Checking rgeos availability: TRUE

## [1] FALSE

uscos<-readShapePoly("/Users/ozd504/Google Drive/dem7263/data/co99_d00.shp")
uscos@data$cofips<-as.character(paste(uscos$STATE, uscos$COUNTY, sep=""))
uscos2<-unionSpatialPolygons(uscos, IDs = uscos@data$cofips)
uscos2<-as(uscos2, "SpatialPolygonsDataFrame")
uscos2@data$cofips<-as.character(row.names(uscos2))
uscos2$state<-substr(uscos2$cofips, 1,2)

#See what counties are in the brfss data
brfco<-unique(brfss_11$cofips)

cob<-uscos2[uscos2$cofips%in% brfco,]
cob$struct<-1:length(cob$cofips)
plot(cob)
nbs<-knearneigh(coordinates(cob), longlat = T, k = 4)
nbs<-knn2nb(nbs, row.names = cob$struct, sym = T)
wts<-nb2listw(nbs)
plot(cob)
plot(wts, coords=coordinates(cob), add=T, col=2)

nb2INLA(file="/Users/ozd504/Google Drive/dem7263/data/us.gra", nbs)

mdat2<-cob@data

indat<-merge(joindata, mdat2, by.x="cofips", by.y="cofips", all.x=T)

## Warning in merge.data.frame(joindata, mdat2, by.x = "cofips", by.y =
## "cofips", : column names 'cholchk', 'mrace' are duplicated in the result

indat<-indat[order(indat$struct),]
indat2<-indat[complete.cases(indat[,c("bmi", "badhealth", "black", "educa", "agez")] ),]
length(unique(indat2$cofips))

## [1] 224

Now we have an analytical dataset.

Below, I will fit a Null model, with only a random intercept for county first. This is like fitting a model to estimate the coutny means of the BMI variable

fit0<-inla(bmiz~1+f(struct, model="iid"), family = "gaussian", data=indat2, num.threads = 2, verbose = F,  control.inla=list(strategy='gaussian', int.strategy='eb'), control.compute = list( dic=T) )
summary(fit0)

## 
## Call:
## c("inla(formula = bmiz ~ 1 + f(struct, model = \"iid\"), family = \"gaussian\", ",  "    data = indat2, verbose = F, control.compute = list(dic = T), ",  "    control.inla = list(strategy = \"gaussian\", int.strategy = \"eb\"), ",  "    num.threads = 2)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          1.7780        134.1774         18.1473        154.1027 
## 
## Fixed effects:
##               mean     sd 0.025quant 0.5quant 0.975quant   mode kld
## (Intercept) 0.0112 0.0077     -0.004   0.0112     0.0264 0.0112   0
## 
## Random effects:
## Name   Model
##  struct   IID model 
## 
## Model hyperparameters:
##                                           mean    sd 0.025quant 0.5quant
## Precision for the Gaussian observations  1.011 0.003      1.005    1.011
## Precision for struct                    82.387 8.571     66.667   82.009
##                                         0.975quant   mode
## Precision for the Gaussian observations      1.017  1.011
## Precision for struct                       100.333 81.321
## 
## Expected number of effective parameters(std dev): 206.72(0.00)
## Number of equivalent replicates : 1098.72 
## 
## Deviance Information Criterion (DIC) ...: 642290.76
## Effective number of parameters .........: 206.72
## 
## Marginal log-Likelihood:  -321344.96 
## Posterior marginals for linear predictor and fitted values computed

1/fit0$summary.hyperpar$mean

## [1] 0.9891644 0.0121379

m<- inla.tmarginal(
        function(x) 1/x,
        fit0$marginals.hyperpar$'Precision for struct')

plot(m, type="l", main=c("Posterior distibution for between county variance", "- Null Model -"))

That is our Null model. Now I will fit a model that includes individual level demographic characteristics:

fit1<-inla(bmiz~black+hispanic+other+lths+coll+ agez+f(struct, model="iid"), family = "gaussian", data=indat2, num.threads = 2, verbose = F, control.inla=list(strategy='gaussian', int.strategy='eb'), control.compute = list(dic=T ))

1/fit1$summary.hyperpar$mean

## [1] 0.971686814 0.009878792

m1<- inla.tmarginal(
        function(x) 1/x,
        fit1$marginals.hyperpar$'Precision for struct')

plot(m1, type="l", main=c("Posterior distibution for between county variance", "- Random Intecept Model -"))

Next a model with a county level predictor, the z-scored poverty rate

fit2<-inla(bmiz~black+hispanic+other+lths+coll+ agez+povz+f(struct, model="iid"), family = "gaussian", data=indat2, num.threads = 2, verbose = F, control.inla=list(strategy='gaussian', int.strategy='eb'), control.compute = list(dic=T ))

summary(fit2)

## 
## Call:
## c("inla(formula = bmiz ~ black + hispanic + other + lths + coll + ",  "    agez + povz + f(struct, model = \"iid\"), family = \"gaussian\", ",  "    data = indat2, verbose = F, control.compute = list(dic = T), ",  "    control.inla = list(strategy = \"gaussian\", int.strategy = \"eb\"), ",  "    num.threads = 2)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          2.1030        216.5067         18.2991        236.9088 
## 
## Fixed effects:
##                mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept)  0.0370 0.0091     0.0191   0.0370     0.0548  0.0370   0
## black1       0.4057 0.0080     0.3900   0.4057     0.4214  0.4057   0
## hispanic1    0.1736 0.0087     0.1565   0.1736     0.1907  0.1736   0
## other1      -0.0308 0.0089    -0.0483  -0.0308    -0.0133 -0.0308   0
## lths         0.0628 0.0090     0.0452   0.0628     0.0803  0.0628   0
## coll        -0.1038 0.0049    -0.1134  -0.1038    -0.0943 -0.1038   0
## agez         0.0084 0.0022     0.0041   0.0084     0.0126  0.0084   0
## povz         0.0201 0.0104    -0.0003   0.0201     0.0405  0.0201   0
## 
## Random effects:
## Name   Model
##  struct   IID model 
## 
## Model hyperparameters:
##                                            mean      sd 0.025quant
## Precision for the Gaussian observations   1.029  0.0031      1.023
## Precision for struct                    102.582 10.9944     82.510
##                                         0.5quant 0.975quant    mode
## Precision for the Gaussian observations    1.029      1.035   1.029
## Precision for struct                     102.062    125.697 101.095
## 
## Expected number of effective parameters(std dev): 208.79(0.00)
## Number of equivalent replicates : 1087.84 
## 
## Deviance Information Criterion (DIC) ...: 638239.64
## Effective number of parameters .........: 208.79
## 
## Marginal log-Likelihood:  -319353.92 
## Posterior marginals for linear predictor and fitted values computed

1/fit2$summary.hyperpar$mean

## [1] 0.971686741 0.009748336

m1<- inla.tmarginal(
        function(x) 1/x,
        fit1$marginals.hyperpar$'Precision for struct')

plot(m1, type="l", main=c("Posterior distibution for between county variance", "- Multi-Level Model -"))

Finally, we model the county level means using the Besag-York and Mollie model that we had used on the areal data last week.

fit3<-inla(bmiz~black+hispanic+other+lths+coll+ agez+povz+f(struct, model="bym", graph="/Users/ozd504/Google Drive/dem7263/data/us.gra",hyper = list(prec.unstruct=list(initial=5.3), prec.spatial=list(initial=5.3))), family = "gaussian", data=indat2, num.threads = 1, verbose = F, control.inla=list(strategy='gaussian', int.strategy='eb'), control.compute = list(dic=T), control.family = list(hyper = list(prec=list(initial= .028))))

## Warning in INLA::f(struct, model = "bym", graph = "/Users/ozd504/Google
## Drive/dem7263/data/us.gra", : The graph for the model bym has 3 connected
## components!!! Model is revised accordingly.

summary(fit3)

## 
## Call:
## c("inla(formula = bmiz ~ black + hispanic + other + lths + coll + ",  "    agez + povz + f(struct, model = \"bym\", graph = \"/Users/ozd504/Google Drive/dem7263/data/us.gra\", ",  "    hyper = list(prec.unstruct = list(initial = 5.3), prec.spatial = list(initial = 5.3))), ",  "    family = \"gaussian\", data = indat2, verbose = F, control.compute = list(dic = T), ",  "    control.family = list(hyper = list(prec = list(initial = 0.028))), ",  "    control.inla = list(strategy = \"gaussian\", int.strategy = \"eb\"), ",  "    num.threads = 1)")
## 
## Time used:
##  Pre-processing    Running inla Post-processing           Total 
##          2.3207        404.1688         24.5839        431.0734 
## 
## Fixed effects:
##                mean     sd 0.025quant 0.5quant 0.975quant    mode kld
## (Intercept)  0.0394 0.0076     0.0245   0.0394     0.0543  0.0394   0
## black1       0.4062 0.0080     0.3905   0.4062     0.4219  0.4062   0
## hispanic1    0.1770 0.0088     0.1598   0.1770     0.1942  0.1770   0
## other1      -0.0287 0.0089    -0.0463  -0.0287    -0.0112 -0.0287   0
## lths         0.0629 0.0090     0.0453   0.0629     0.0805  0.0629   0
## coll        -0.1038 0.0049    -0.1133  -0.1038    -0.0942 -0.1038   0
## agez         0.0085 0.0022     0.0042   0.0085     0.0128  0.0085   0
## povz         0.0273 0.0094     0.0089   0.0273     0.0457  0.0273   0
## 
## Random effects:
## Name   Model
##  struct   BYM model 
## 
## Model hyperparameters:
##                                             mean      sd 0.025quant
## Precision for the Gaussian observations    1.034  0.0062      1.026
## Precision for struct (iid component)     197.979  9.6364    178.107
## Precision for struct (spatial component) 176.243 42.4627     91.427
##                                          0.5quant 0.975quant    mode
## Precision for the Gaussian observations     1.033      1.049   1.027
## Precision for struct (iid component)      198.143    217.143 198.549
## Precision for struct (spatial component)  179.131    250.189 203.974
## 
## Expected number of effective parameters(std dev): 198.04(0.00)
## Number of equivalent replicates : 1146.86 
## 
## Deviance Information Criterion (DIC) ...: 638228.92
## Effective number of parameters .........: 198.06
## 
## Marginal log-Likelihood:  -319265.25 
## Posterior marginals for linear predictor and fitted values computed

1/fit3$summary.hyperpar$mean

## [1] 0.967225688 0.005051032 0.005673974

m3<- inla.tmarginal(
        function(x) 1/x,
        fit3$marginals.hyperpar$`Precision for struct (iid component)`)
m32<- inla.tmarginal(
        function(x) 1/x,
        fit3$marginals.hyperpar$`Precision for struct (spatial component)`)

plot(m3, type="l", main=c("Posterior distibution for between county variance", "- Multi-level BYM Model IID-"))

plot(m32, type="l", main=c("Posterior distibution for between county variance", "- Multi-level BYM Model Spatial-"))

Bayesian Multi-level Regression Models Using INLA

Corey S. Sparks, Ph.D.

December 2, 2015

Higher level predictors