In this example, I introduce how to fit the multi-level model using the lme4 package. This example considers the linear case of the model, where the outcome is assumed to be continuous, and the model error term is assumed to be Gaussian. Subsequent examples will highlight the Generalized Linear Mixed Model (GLMM).

This example shows how to: * Examine variation between groups using fixed effects * Fit the basic random intercept model : +\(y_{ij} = \mu + u_{j} + e_{ij}\) with \(u_j \sim N(0, \sigma^2)\) Where the intercepts (\(u_j\)) for each group vary randomly around the overall mean (\(\mu\))

*I also illustrate how to include group-level covariates and how to fit the random slopes model and the model for cross level interactions

The example merges data from the 2011 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART county data. Link and the 2010 American Community Survey 5-year estimates at the county level. More details on these data are found below.

library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
library(arm)
## Loading required package: MASS
## 
## arm (Version 1.7-07, built: 2014-8-27)
## 
## Working directory is /Users/ozd504/Google Drive/dem7283/Rcode/Rcode15
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## 
## The following object is masked from 'package:lme4':
## 
##     lmer
## 
## The following object is masked from 'package:stats':
## 
##     step
library(car)
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:arm':
## 
##     logit
library(MASS)
library(ggplot2)

#load brfss
load("~/Google Drive/dem7283/data/brfss_11.Rdata")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)

brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")

#just for brevity, I just select TX respondents with non missing weights
brfss_11<-brfss_11[is.na(brfss_11$cntywt)==F,]
#insurance
brfss_11$ins<-ifelse(brfss_11$hlthpln1==1,1,0)
#smoking currently
brfss_11$smoke<-recode(brfss_11$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#low physical activity
brfss_11$lowact<-recode(brfss_11$pacat, recodes="1:2=0; 3:4=1; else=NA")
#high blood pressure
brfss_11$hbp<-recode(brfss_11$bphigh4, recodes="1=1; 2:4=0; else=NA")
#high cholesterol
brfss_11$hc<-recode(brfss_11$toldhi2, recodes="1=1; 2=0; else=NA")
#bmi
brfss_11$bmi<-ifelse(is.na(brfss_11$bmi5)==T, NA, brfss_11$bmi5/100)
#poor or fair health
brfss_11$badhealth<-ifelse(brfss_11$genhlth %in% c(4,5),1,0)

#race
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=T)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=T)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=T)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=T)

#have a personal doctor?
brfss_11$doc<-recode(brfss_11$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor.result=F)

#needed care in last year but couldn't get it because of cost
brfss_11$medacc<-recode(brfss_11$medcost, recodes="1=1;2=0;else=NA")

#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)

#employment
brfss_11$employ<-recode(brfss_11$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor.result=T)
brfss_11$employ<-relevel(brfss_11$employ, ref='Employed')

#marital status
brfss_11$marst<-recode(brfss_11$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
brfss_11$marst<-relevel(brfss_11$marst, ref='married')

#income
brfss_11$inc<-as.factor(ifelse(brfss_11$incomg==9, NA, brfss_11$incomg))

#Age cut into intervals
brfss_11$agec<-cut(brfss_11$age, breaks=c(0,24,39,59,79,99))

I want to see how many people we have in each county in the data:

#Now we will begin fitting the multilevel regression model with the county
#that the person lives in being the higher level
table(brfss_11$cofips)
## 
## 01073 01097 02020 02090 02170 04013 04019 05119 06001 06013 06037 06059 
##   769   600   712   560   551  1626   844   670   745   580  3209  1347 
## 06065 06067 06071 06073 06085 08001 08005 08013 08031 08035 08041 08059 
##  1037   750   946  1689   834   999  1063   594  1103   687  1238  1401 
## 08069 08123 09001 09003 09009 10001 10003 10005 11001 12086 13089 13121 
##   680   563  1650  2113  1482  1415  2030  1332  4560   719   568   637 
## 15001 15003 15007 15009 16001 16027 17031 18089 18097 19113 19153 20045 
##  1477  3830   674  1625   850   521  1607   889  1332   636   965   769 
## 20091 20173 20177 20209 21111 22019 22033 22113 23001 23003 23005 23007 
##  3346  3367  1322  1163  1987   593   624   515   842   744  2263   512 
## 23009 23011 23013 23015 23017 23019 23027 23029 23031 24003 24005 24021 
##   601  1114   660   650   554  1195   615   628  1578   704  1091   591 
## 24031 24033 24510 25001 25005 25009 25013 25017 25021 25023 25025 25027 
##  1222   949   648   519  2853  2706  2080  4311  1824  1916  2317  2720 
## 26081 26125 26163 27003 27037 27053 27123 27137 27163 29095 29189 29510 
##   753   917  1878   727   879  4152  2274   532   536   678   700   535 
## 30013 30029 30031 30041 30047 30049 30063 30111 31001 31019 31043 31055 
##   709   713   589   561   903   655   792  1032   562   518   932  4414 
## 31079 31109 31111 31119 31141 31153 31157 31173 32003 32031 33005 33009 
##   731  2523   639   521   608  1166   864   529  2217  1650   525   504 
## 33011 33013 33015 33017 34001 34003 34005 34007 34009 34013 34015 34017 
##  1605   714  1051   638  1075   887   712   804   613  1371   578  1275 
## 34019 34021 34023 34025 34027 34029 34031 34035 34037 34039 34041 35001 
##   581   629   850   723   835   661   633   657   578   698   578  1919 
## 35013 35043 35045 35049 35061 36047 36061 36081 37063 37081 37119 37183 
##   739   736   750   804   507  1037  1058   797   540   639   686   577 
## 38015 38017 39035 39049 39061 39095 39099 39113 39151 39153 40027 40109 
##   703   947   753   724   724   660   664   670   674   678   501  1503 
## 40143 41005 41039 41051 41067 42003 42101 44003 44007 44009 45003 45013 
##  1730   559   659  1085   712  1395  1481   983  3978   795   614   862 
## 45019 45045 45051 45075 45079 45083 46011 46013 46029 46065 46081 46099 
##   967   868   807   535   913   590   500   526   508   541   533   771 
## 46103 48029 48133 48157 48201 48303 48329 48423 48439 48453 49011 49035 
##   651  1057   608   943  1503   755   543   570   570  1045  1169  4201 
## 49045 49049 49051 49057 50007 50021 50023 50025 50027 53011 53033 53053 
##   611  1658   502  1020  1542   734   677   565   687   644  3336   976 
## 53061 53063 53067 54039 55079 56013 56021 56025 
##   889  1322   500   640  1128   506  1121   861
#people within each county

#How many total counties are in the data?
length(table(brfss_11$cofips))
## [1] 224
#counties

Higher level predictors

We will often be interested in factors at both the individual AND contextual levels. To illustrate this, I will use data from the American Community Survey measured at the county level. Specifically, I use the DP3 table, which provides economic characteristics of places, from the 2010 5 year ACS Link.

#I will also add in some Census variables from the ACS 2010 5 Year estimates
#load in ACS data from Factfinder
acsecon<-read.csv("~/Google Drive/dem7283/data/aff_download/ACS_10_5YR_DP03_with_ann.csv")
acsecon$povrate<-acsecon[, "HC03_VC156"]
acsecon$unemployed<-acsecon[, "HC03_VC13"]
acsecon$cofips<-substr(acsecon$GEO.id, 10,14)
acsecon$povz<-scale(acsecon$povrate, center=T, scale=T)
acsecon$unempz<-scale(acsecon$unemployed, center=T, scale=T)
acsecon<-acsecon[, c("cofips", "povrate","povz", "unemployed","unempz")]

head(acsecon)
##   cofips povrate       povz unemployed       unempz
## 1  01001     7.5 -0.6960015        6.2 -0.396270981
## 2  01003     9.1 -0.4069846        6.6 -0.277120527
## 3  01005    19.9  1.5438794        9.6  0.616507876
## 4  01007     9.4 -0.3527939        9.1  0.467569809
## 5  01009    10.0 -0.2444126        7.5 -0.009032006
## 6  01011    22.6  2.0315954       11.2  1.093109691
joindata<-merge(brfss_11, acsecon, by="cofips", all.x=T, all.y=F)
## Warning in merge.data.frame(brfss_11, acsecon, by = "cofips", all.x = T, :
## column names 'cholchk', 'mrace' are duplicated in the result
#and merge the data back to the kids data
joindata$bmiz<-scale(joindata$bmi, center=T, scale=T)
joindata$agez<-scale(joindata$age, center=T, scale=T)

As a general rule, I will do a basic fixed-effects ANOVA as a precursor to doing full multi-level models, just to see if there is any variation amongst my higher level units (groups). If I do not see any variation in my higher level units, I generally will not proceed with the process of multi-level modeling.

fit.an<-lm(bmiz~as.factor(cofips), joindata)
anova(fit.an)
## Analysis of Variance Table
## 
## Response: bmiz
##                       Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(cofips)    223   2929 13.1330   13.29 < 2.2e-16 ***
## Residuals         229431 226725  0.9882                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Which shows significant variation in average BMI across counties.

As a note, if my outcome is binomial or poisson, I would do glm(y~as.factor(cofips),family=binomial ,brfss_11) instead of lm() as this would give you the correct test for the other distributions.

Fitting the basic multi-level model

To specify a random intercept model for counties, we add, a model term that is (1|HIGHER LEVEL VARIABLE), which tells R to fit only a random intercept for each county, in our case it will be (1|cofips)

fit.mix<-lmer(bmiz~agez+lths+coll+black+hispanic+other+(1|cofips), data=joindata)
#do a test for the random effect
rand(fit.mix)
## Analysis of Random effects Table:
##        Chi.sq Chi.DF p.value    
## cofips   1603      1  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
display(fit.mix, detail=T)
## lme4::lmer(formula = bmiz ~ agez + lths + coll + black + hispanic + 
##     other + (1 | cofips), data = joindata)
##             coef.est coef.se t value
## (Intercept)   0.03     0.01    3.73 
## agez          0.01     0.00    3.86 
## lths          0.06     0.01    6.86 
## coll         -0.11     0.00  -21.61 
## black1        0.41     0.01   50.99 
## hispanic1     0.17     0.01   19.88 
## other1       -0.03     0.01   -3.32 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.10    
##  Residual             0.99    
## ---
## number of obs: 226744, groups: cofips, 224
## AIC = 637546, DIC = 637411.7
## deviance = 637470.0

So we see that our standard deviation at the county level is .1, and the standard deviation at the individual level (residual standard deviation) is .99. Square these to get variances, of course. Our fixed effects are interpreted as normal, older people have higher BMI’s than younger people, those with less than High School education have higher BMI’s, while people with college education have lower BMI’s than those with a high school education only. In terms of race/ethnicity, African Americans and Hispanics have higher average BMI’s, compared to Non-Hispanic Whites, while those of some other race/ethnicity have lower average BMI’s compared to whites. See, just like ordinary regression.

Some may be interested in getting the intra-class correlation coefficient. While I don’t usually pay attention to this, here it is:

#it can be a little hairy to get it, but it can be done using various part of VarCorr()
ICC1<-VarCorr(fit.mix)$cofips[1]/( VarCorr(fit.mix)$cofips[1]+attr(VarCorr(fit.mix), "sc"))
ICC1
## [1] 0.0100321

So about 1% of the variance in BMI is due to difference between counties. That’s not much, but according to our random effect testing, it’s not, statistically speaking, 0.

Sometimes, to gain the appreciation, we may want to plot the random effects, I first show the fixed effects, then the random effects:

#I need to rescale the estimates to the BMI scale from the z-score scale
meanbmi<-mean(joindata$bmi, na.rm=T)
sdbmi<-sd(joindata$bmi, na.rm=T)


fixcoef<-fit.an$coefficients[1]+fit.an$coefficients[-1]
fixcoef<-(fixcoef*sdbmi)+meanbmi

plot(NULL,ylim=c(25, 30), xlim=c(0,1), ylab="Intercept", xlab="Age") # get the ylim from a summary of rancoefs1
title(main="Fixed Effect Model")

for (i in 1: length(fixcoef)[1]){
  #I plug in a beta, here it's the effect of age from fit.mix
  abline(a=fixcoef[i], b=.01,  lwd=1.5, col="green")
}

#It may be easier to visualize the random intercepts by plotting them
rancoefs1<-ranef(fit.mix)$cofips+fixef(fit.mix)[1]
rancoefs1<-(rancoefs1*sdbmi)+meanbmi
summary(rancoefs1)
##   (Intercept)   
##  Min.   :25.77  
##  1st Qu.:27.20  
##  Median :27.54  
##  Mean   :27.54  
##  3rd Qu.:27.94  
##  Max.   :29.12
plot(NULL,ylim=c(25,30), xlim=c(0,1), ylab="Intercept", xlab="Age") # get the ylim from a summary of rancoefs1
title(main="Random Intercept Models")

for (i in 1: length(rancoefs1[,1])){
  #I plug in a beta, here it's the effect of age from fit.mix
  abline(a=rancoefs1[i,1], b=.01,  lwd=1.5, col="maroon")
}

So what’s the difference? It may be informative to plot the estimated BMI’s for each county from the OLS and multilevel models. This section illustrates the effects of “pooling” as Gelman & Hill ch 12.

#these models are good for estimating group means better than traditional methods
#this follows the examples in chapter 12 of Gelman and Hill, I stole the code directly from them.

#complete pooling, this model fits the grand mean ONLY
fit.cp<-lm(bmi~1, joindata)
display(fit.cp)
## lm(formula = bmi ~ 1, data = joindata)
##             coef.est coef.se
## (Intercept) 27.37     0.01  
## ---
## n = 229655, k = 1
## residual sd = 5.93, R-Squared = 0.00
#No pooling i.e. fixed effects regression, this model fits separate means for each county using OLS
lm.unpooled<-lm(bmi~factor(cofips)-1, joindata)


#partial pooling, this model fits the population mean and county deviations using multilevel models
fit0<-lmer(bmi~1+(1|cofips), joindata)

#partial pooling with covariate
fit0.1<-lmer(bmi~povz+unempz+(1|cofips), joindata)


#Plot the means of the counties
J<-length(unique(joindata$cofips))
ns<-as.numeric(table(brfss_11$cofips))
sample.size <- as.numeric(table(brfss_11$cofips))
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))

par (mar=c(5,5,4,2)+.1)
plot (sample.size.jittered, coef(lm.unpooled), cex.lab=1.2, cex.axis=1.2,
      xlab="sample size in county j", ylab=expression (paste
                                                       ("est. intercept, ", alpha[j], "   (no pooling)")),
      pch=20, log="x", ylim=c(25, 30), yaxt="n", xaxt="n")
axis (1, quantile(ns), cex.axis=1.1)
axis (2, seq(25, 30), cex.axis=1.1)
for (j in 1:J){
  lines (rep(sample.size.jittered[j],2),
         coef(lm.unpooled)[j] + c(-1,1)*se.coef(lm.unpooled)[j], lwd=.5)
}
abline (coef(fit.cp)[1], 0, lwd=.5)
title(main="Estimates of County Means from the Fixed Effect Model")

#plot MLM estimates of county means + se's
par (mar=c(5,5,4,2)+.1)
a.hat.M1 <- coef(fit0)$cofips[,1]
a.se.M1 <- se.coef(fit0)$cofips

plot (as.numeric(ns), t(a.hat.M1), cex.lab=1.2, cex.axis=1.1,
      xlab="sample size in county j",
      ylab=expression (paste("est. intercept, ", alpha[j], "(multilevel model)")),
      pch=20, log="x", ylim=c(25, 30), yaxt="n", xaxt="n")
axis (1, quantile(ns), cex.axis=1.1)
axis (2, seq(25,30), cex.axis=1.1)
for (j in 1:length(unique(joindata$cofips))){
  lines (rep(as.numeric(ns)[j],2),
         as.vector(a.hat.M1[j]) + c(-1,1)*a.se.M1[j], lwd=.5, col="gray10")
}
abline (coef(fit.cp)[1], 0, lwd=.5)
title(main="Estimates of County Means from the MLM")

#plot MLM estimates of county means + se's, model with county covariates
par (mar=c(5,5,4,2)+.1)
a.hat.M2 <- coef(fit0.1)$cofips[,1]
a.se.M2 <- se.coef(fit0.1)$cofips

plot (as.numeric(ns), t(a.hat.M2), cex.lab=1.2, cex.axis=1.1,
      xlab="sample size in county j",
      ylab=expression (paste("est. intercept, ", alpha[j], "(multilevel model with covariates)")),
      pch=20, log="x", ylim=c(25, 30), yaxt="n", xaxt="n")
axis (1, quantile(ns), cex.axis=1.1)
axis (2, seq(25,30), cex.axis=1.1)
for (j in 1:length(unique(joindata$cofips))){
  lines (rep(as.numeric(ns)[j],2),
         as.vector(a.hat.M2[j]) + c(-1,1)*a.se.M2[j], lwd=.5, col="gray10")
}
abline (coef(fit.cp)[1], 0, lwd=.5)
title(main="Estimates of County Means from the MLM with county predictors")

#This plot shows that as the n_j goes down, the standard error of the estimate increases
qplot(y=se.coef(lm.unpooled), x=coef(lm.unpooled), size=sqrt(ns))

#again, we see the relationship between variance and n_j
qplot(y=as.numeric(a.se.M1),x=se.coef(lm.unpooled),color=sqrt(ns))

hist(se.coef(lm.unpooled), col="red")
hist(a.se.M1, add=T, col="white")

Multilevel model with group-level predictor

Now, I fit the same model above, but this time I include a predictor at the county level, the poverty rate.

#Now I estimate the multilevel model including the effects for the
#county level variables
fit.mix2<-lmer(bmiz~agez+lths+coll+black+hispanic+other+povz+(1|cofips), data=joindata)
display(fit.mix2, detail=T)
## lme4::lmer(formula = bmiz ~ agez + lths + coll + black + hispanic + 
##     other + povz + (1 | cofips), data = joindata)
##             coef.est coef.se t value
## (Intercept)   0.04     0.01    4.20 
## agez          0.01     0.00    3.84 
## lths          0.06     0.01    6.84 
## coll         -0.11     0.00  -21.60 
## black1        0.41     0.01   50.75 
## hispanic1     0.17     0.01   19.80 
## other1       -0.03     0.01   -3.35 
## povz          0.02     0.01    1.90 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.10    
##  Residual             0.99    
## ---
## number of obs: 226744, groups: cofips, 224
## AIC = 637552, DIC = 637400.8
## deviance = 637466.4
rand(fit.mix2)
## Analysis of Random effects Table:
##        Chi.sq Chi.DF p.value    
## cofips   1586      1  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ICC2<-VarCorr(fit.mix2)$cofips[1]/( VarCorr(fit.mix2)$cofips[1]+attr(VarCorr(fit.mix2), "sc"))
ICC2
## [1] 0.009905632
#compare the random intercept ad multilevel model with a LRT
anova(fit.mix, fit.mix2)
## refitting model(s) with ML (instead of REML)
## Data: joindata
## Models:
## object: bmiz ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
## ..1: bmiz ~ agez + lths + coll + black + hispanic + other + povz + 
## ..1:     (1 | cofips)
##        Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)  
## object  9 637488 637581 -318735   637470                           
## ..1    10 637486 637590 -318733   637466 3.6169      1     0.0572 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#There is slight improvement, p=.0572 becuase the poverty variable was almost significant, t=1.90

The ICC goes down, which points to the fact that we are controlling away some of the between-county variance by including the county level predictor.

Random slope models

This effectively allows one or more of the individual level variables to have different effects in each of the groups.

#To do a random slope model, I do:
fit.mix3<-lmer(bmiz~agez+lths+coll+black+hispanic+other+povz+(black|cofips), joindata, REML=F)
display(fit.mix3, digits=3)
## lme4::lmer(formula = bmiz ~ agez + lths + coll + black + hispanic + 
##     other + povz + (black | cofips), data = joindata, REML = F)
##             coef.est coef.se
## (Intercept)  0.039    0.009 
## agez         0.008    0.002 
## lths         0.061    0.009 
## coll        -0.104    0.005 
## black1       0.372    0.013 
## hispanic1    0.174    0.009 
## other1      -0.029    0.009 
## povz         0.025    0.010 
## 
## Error terms:
##  Groups   Name        Std.Dev. Corr   
##  cofips   (Intercept) 0.104           
##           black1      0.110    -0.448 
##  Residual             0.985           
## ---
## number of obs: 226744, groups: cofips, 224
## AIC = 637387, DIC = 637363.1
## deviance = 637363.1
rand(fit.mix3)
## Warning in totalAnovaRandLsmeans(model = model, isRand = TRUE, reduce.random = FALSE): 
##  model has been refitted with REML=TRUE
## Analysis of Random effects Table:
##              Chi.sq Chi.DF p.value    
## black:cofips    104      2  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#compare the models with a LRT
anova(fit.mix2, fit.mix3)
## refitting model(s) with ML (instead of REML)
## Data: joindata
## Models:
## object: bmiz ~ agez + lths + coll + black + hispanic + other + povz + 
## object:     (1 | cofips)
## ..1: bmiz ~ agez + lths + coll + black + hispanic + other + povz + 
## ..1:     (black | cofips)
##        Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## object 10 637486 637590 -318733   637466                             
## ..1    12 637387 637511 -318682   637363 103.31      2  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#the random slope model fits better

#plot random slopes and intercepts
rancoefs2<-ranef(fit.mix3)

plot(NULL, ylim=c(-.5, .5), xlim=c(-1,1),ylab="Intercept", xlab="Age")
title (main="Random Slope and Intercept Model")

for (i in 1: dim(rancoefs2$cofips)[1]){
  
  abline(a=fixef(fit.mix3)[1]+rancoefs2$cofips[[1]][i], b=fixef(fit.mix3)[2]+rancoefs2$cofips[[2]][i], col=2)
}

Cross level interaction effect

Here, I show the model for a cross-level interaction. This model fits an interaction term between (at least) one individual level variable and a group level variable. This allows you to ask very informative questions regarding individuals within specific contexts.

#Cross-level interaction model:
fit.mix4<-lmer(bmiz~agez+lths+coll+black+hispanic+other+black*povz+hispanic*povz+other*povz+(1|cofips), joindata, REML=F)
display(fit.mix4, digits=3)
## lme4::lmer(formula = bmiz ~ agez + lths + coll + black + hispanic + 
##     other + black * povz + hispanic * povz + other * povz + (1 | 
##     cofips), data = joindata, REML = F)
##                coef.est coef.se
## (Intercept)     0.028    0.009 
## agez            0.008    0.002 
## lths            0.060    0.009 
## coll           -0.105    0.005 
## black1          0.415    0.008 
## hispanic1       0.192    0.009 
## other1          0.016    0.011 
## povz           -0.002    0.011 
## black1:povz     0.075    0.011 
## hispanic1:povz  0.066    0.013 
## other1:povz     0.108    0.013 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.099   
##  Residual             0.985   
## ---
## number of obs: 226744, groups: cofips, 224
## AIC = 637378, DIC = 637352.2
## deviance = 637352.2
#This basically says that racial/ethnic minorities in counties with higher than average poverty rates (povz==1 vs povz==0) have higher BMI's

rand(fit.mix4)
## Warning in totalAnovaRandLsmeans(model = model, isRand = TRUE, reduce.random = FALSE): 
##  model has been refitted with REML=TRUE
## Analysis of Random effects Table:
##        Chi.sq Chi.DF p.value    
## cofips   1607      1  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#compare the models with a LRT
anova(fit.mix3, fit.mix4)
## Data: joindata
## Models:
## object: bmiz ~ agez + lths + coll + black + hispanic + other + povz + 
## object:     (black | cofips)
## ..1: bmiz ~ agez + lths + coll + black + hispanic + other + black * 
## ..1:     povz + hispanic * povz + other * povz + (1 | cofips)
##        Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## object 12 637387 637511 -318682   637363                             
## ..1    13 637378 637513 -318676   637352 10.873      1  0.0009756 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1