Week 5: Basic Hierarchical Models - Cross level interactions & Contextual Effects

In this example, I will show how to fit a multi-level model that includes a predictor at the macro level. I will also consider the cross-level interaction effect, where we are interested in contextualizing the effect of an individual level predictor within the context of the macro level predictor. The data we use this time merges data from the 2011 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART county data and the 2010 American Community Survey 5-year estimates at the county level. Our outcome of interest is a person’s obesity status, measured using the BRFSS’s BMI variable, and using the cutoff rule of obese weight is a BMI greater than 30.

Again, we are assuming a set of propositions where we have an individual level outcome, and predictor variables at both the micro and macro levels. This can be shown in several different ways.

test

The first case is a macro to micro proposition, which may be exemplified by a statement such as: “Individuals in areas with high rates of poverty are more likely to be obese”.

Whereas the second frame illustrates a more specific special case, where there is a macro level effect, net of the individual level predictor, and may be stated “Individuals who are racial/ethnic minorities are more likely to be obese, compared to Non-Hispanic whites, and living in areas of high poverty also increase the risk of obesity”.

The last panel illustrates what is known as a cross level interaction, or a macro-micro interaction. This is where the relationship between x and y is dependent on Z. This leads to the statement “Individuals who are racial/ethnic minorities, living in areas with high poverty rates are at higher risk of obesity”.

First we load our data and libraries

library(lme4)

## Loading required package: Matrix

library(arm)

## Loading required package: MASS
## 
## arm (Version 1.8-6, built: 2015-7-7)
## 
## Working directory is /Users/ozd504/Google Drive/dem7903_App_Hier/code/code14

library(car)

## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:arm':
## 
##     logit

library(MASS)


#load brfss
load("~/Google Drive/dem7903_App_Hier/data/brfss_11.Rdata")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)

brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")

#insurance
brfss_11$ins<-ifelse(brfss_11$hlthpln1==1,1,0)
#smoking currently
brfss_11$smoke<-recode(brfss_11$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#low physical activity
brfss_11$lowact<-recode(brfss_11$pacat, recodes="1:2=0; 3:4=1; else=NA")
#high blood pressure
brfss_11$hbp<-recode(brfss_11$bphigh4, recodes="1=1; 2:4=0; else=NA")
#high cholesterol
brfss_11$hc<-recode(brfss_11$toldhi2, recodes="1=1; 2=0; else=NA")
#bmi
brfss_11$bmi<-ifelse(is.na(brfss_11$bmi5)==T, NA, brfss_11$bmi5/100)
#poor or fair health
brfss_11$badhealth<-ifelse(brfss_11$genhlth %in% c(4,5),1,0)

#race
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=T)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=T)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=T)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=T)

#have a personal doctor?
brfss_11$doc<-recode(brfss_11$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor.result=F)

#needed care in last year but couldn't get it because of cost
brfss_11$medacc<-recode(brfss_11$medcost, recodes="1=1;2=0;else=NA")

#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)

#employment
brfss_11$employ<-recode(brfss_11$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor.result=T)
brfss_11$employ<-relevel(brfss_11$employ, ref='Employed')

#marital status
brfss_11$marst<-recode(brfss_11$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
brfss_11$marst<-relevel(brfss_11$marst, ref='married')

#income
brfss_11$inc<-as.factor(ifelse(brfss_11$incomg==9, NA, brfss_11$incomg))

#Age cut into intervals
brfss_11$agec<-cut(brfss_11$age, breaks=c(0,24,39,59,79,99))

Now we will begin fitting the multilevel regression model with the county that the person lives in being the higher level.

Higher level (Macro) variables

As a rule, when using ANY continuous predictor, and Especially continuous predictors at the higher level, you should always standardize (z-score) them. This is done for 2 reasons, first is that it makes the computer happy, meaning that, if the scale of all predictor variables is on a similar scale as the variance components of the model, the algorithms (Laplace approximation) will converge faster. Second is that scaling your higher level variables makes the interpretation of the effect of the higher level variable easier.

You should always standardize your macro level variables prior to merging them back to the individual level data.

To measure macro level variables, I will include some Census variables from the ACS 2010 5 Year estimates load in ACS data from Factfinder. The first set of variables includes information on the economic conditions of the county, specifically poverty and unemployment.

acsecon<-read.csv("~/Google Drive/dem7903_App_Hier/data/aff_download/ACS_10_5YR_DP03_with_ann.csv")
acsecon$povrate<-acsecon[, "HC03_VC156"]
acsecon$unemployed<-acsecon[, "HC03_VC13"]
acsecon$cofips<-substr(acsecon$GEO.id, 10,14)
acsecon<-acsecon[, c("cofips", "povrate", "unemployed")]

joindata<-acsecon
head(acsecon)

##   cofips povrate unemployed
## 1  01001     7.5        6.2
## 2  01003     9.1        6.6
## 3  01005    19.9        9.6
## 4  01007     9.4        9.1
## 5  01009    10.0        7.5
## 6  01011    22.6       11.2

I also have segregation measures at the count level that I made here, I will merge these in too.

load("~/Google Drive/dem7903_App_Hier/data/segregation_county.Rdata")
joindata<-merge(joindata, county_dat, by.x="cofips", by.y="cofips", all.x=T)

joindata$povz<-scale(joindata$povrate, center=T, scale=T)
joindata$unempz<-scale(joindata$unemployed, center=T, scale=T)
joindata$diss.z<-scale(joindata$dissim_wb, center=T, scale=T)
joindata$iso.z<-scale(joindata$isolation_b, center=T, scale=T)
joindata$theil.z<-scale(joindata$TheileH, center=T, scale=T)

head(joindata)

##   cofips povrate unemployed dissim_wb isolation_b interaction_bw
## 1  01001     7.5        6.2 0.2897522  0.27474922      0.6897242
## 2  01003     9.1        6.6 0.3903035  0.18464112      0.7673910
## 3  01005    19.9        9.6 0.1913727  0.50243412      0.4518021
## 4  01007     9.4        9.1 0.4338891  0.33170220      0.6477188
## 5  01009    10.0        7.5 0.3776626  0.02200736      0.9040929
## 6  01011    22.6       11.2 0.2412244  0.71810200      0.2139202
##      TheileH       povz       unempz     diss.z      iso.z    theil.z
## 1 0.08299523 -0.6960015 -0.396270981 -0.1041120  0.7875219  0.1091286
## 2 0.09203860 -0.4069846 -0.277120527  0.4777865  0.2928025  0.2125200
## 3 0.04898714  1.5438794  0.616507876 -0.6734431  2.0375776 -0.2796806
## 4 0.11959518 -0.3527939  0.467569809  0.7300204  1.1002102  0.5275699
## 5 0.05283605 -0.2444126 -0.009032006  0.4046325 -0.6001036 -0.2356765
## 6 0.05627216  2.0315954  1.093109691 -0.3849469  3.2216563 -0.1963920

Now, I merge the data back to the individual level data:

merged<-merge(x=brfss_11, y=joindata, by.x="cofips", by.y="cofips", all.x=T)

## Warning in merge.data.frame(x = brfss_11, y = joindata, by.x = "cofips", :
## column names 'cholchk', 'mrace' are duplicated in the result

merged$bmiz<-scale(merged$bmi, center=T, scale=T)
merged$agez<-scale(merged$age, center=T, scale=T)

Models

Here I fit a binomial model (binary logistic regression) for obesity status (1= obese, 0=not obese) First a simple random intercept model.

I subset the data to only have a few states, this is purely for faster computer time:

#California,Illinois,  Massachusetts, New York, Ohio, Texas
merged<-merged[merged$statefip%in%c("06", "25","17", "36", "39","48"),]

Here I fit the random intercept logistic regression:

Model 1

fit.mix.bin<-glmer(I(bmi>=30)~agez+lths+coll+black+hispanic+other+(1|cofips), family=binomial, data=merged)

display(fit.mix.bin, detail=T)

## glmer(formula = I(bmi >= 30) ~ agez + lths + coll + black + hispanic + 
##     other + (1 | cofips), data = merged, family = binomial)
##             coef.est coef.se z value Pr(>|z|)
## (Intercept)  -1.04     0.04  -27.56    0.00  
## agez         -0.02     0.01   -1.32    0.19  
## lths          0.21     0.04    5.01    0.00  
## coll         -0.26     0.03   -9.89    0.00  
## black1        0.64     0.04   17.37    0.00  
## hispanic1     0.33     0.04    9.24    0.00  
## other1       -0.30     0.05   -5.84    0.00  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.18    
##  Residual             1.00    
## ---
## number of obs: 45954, groups: cofips, 39
## AIC = 50591.7, DIC = 50357.7
## deviance = 50466.7

#confidence intervals for the Odds ratio
exp(confint(fit.mix.bin, method="Wald"))

##                 2.5 %    97.5 %
## .sig01             NA        NA
## (Intercept) 0.3271878 0.3795132
## agez        0.9626842 1.0073950
## lths        1.1328775 1.3301492
## coll        0.7343799 0.8133288
## black1      1.7686955 2.0448012
## hispanic1   1.2980843 1.4939562
## other1      0.6655864 0.8167191

Model 2

Here is a model that only considers the macro level variable on the outcome:

fit.mix.bin.mac<-glmer(I(bmi>=30)~povz+iso.z+(1|cofips), family=binomial, data=merged)
display(fit.mix.bin.mac)

## glmer(formula = I(bmi >= 30) ~ povz + iso.z + (1 | cofips), data = merged, 
##     family = binomial)
##             coef.est coef.se
## (Intercept) -1.06     0.05  
## povz         0.14     0.06  
## iso.z       -0.01     0.03  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.18    
##  Residual             1.00    
## ---
## number of obs: 46641, groups: cofips, 39
## AIC = 52079.6, DIC = 51855.2
## deviance = 51963.4

So, in counties that have higher poverty rates, the risk of obesity is higher, while there is not effect of living in more highly segregated counties.

Model 3

Then I do a multi-level model where I include the poverty rate and the segregation index at the county level:

fit.mix.bin.ml<-glmer(I(bmi>=30)~agez+lths+coll+black+hispanic+other+povz+(1|cofips), family=binomial, data=merged)

display(fit.mix.bin.ml, detail=T)

## glmer(formula = I(bmi >= 30) ~ agez + lths + coll + black + hispanic + 
##     other + povz + (1 | cofips), data = merged, family = binomial)
##             coef.est coef.se z value Pr(>|z|)
## (Intercept)  -1.04     0.04  -26.69    0.00  
## agez         -0.02     0.01   -1.32    0.19  
## lths          0.20     0.04    5.00    0.00  
## coll         -0.26     0.03   -9.89    0.00  
## black1        0.64     0.04   17.30    0.00  
## hispanic1     0.33     0.04    9.22    0.00  
## other1       -0.30     0.05   -5.84    0.00  
## povz          0.04     0.05    0.72    0.47  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.18    
##  Residual             1.00    
## ---
## number of obs: 45954, groups: cofips, 39
## AIC = 50593.1, DIC = 50358
## deviance = 50466.6

#confidence intervals for the Odds ratio
exp(confint(fit.mix.bin.ml, method="Wald"))

##                 2.5 %    97.5 %
## .sig01             NA        NA
## (Intercept) 0.3287546 0.3828019
## agez        0.9626947 1.0074044
## lths        1.1327131 1.3299240
## coll        0.7345163 0.8134687
## black1      1.7656573 2.0418205
## hispanic1   1.2971888 1.4929936
## other1      0.6657076 0.8168319
## povz        0.9376814 1.1490977

#Compare this to the first model, to see if the higher level variables contribute to the model
anova(fit.mix.bin, fit.mix.bin.ml)

## Data: merged
## Models:
## fit.mix.bin: I(bmi >= 30) ~ agez + lths + coll + black + hispanic + other + 
## fit.mix.bin:     (1 | cofips)
## fit.mix.bin.ml: I(bmi >= 30) ~ agez + lths + coll + black + hispanic + other + 
## fit.mix.bin.ml:     povz + (1 | cofips)
##                Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)
## fit.mix.bin     8 50592 50662 -25288    50576                         
## fit.mix.bin.ml  9 50593 50672 -25288    50575 0.5163      1     0.4724

These results show, that net of individual level variables, the county level predictors do not tell us anything about obesity risk.

Model 4

Finally, I fit the Cross level interaction model between poverty and racial, using the individual level race variables as my micro level variable of interest. This would answer the question, “Do racial/ethnic minorities living in High poverty areas face added risk of being obese” or “Do racial/ethnic minorities living in highly segregated area face added risk of being obese”

To do this in your model, you simply include a term of individualvar*highervar term:

###Poverty*Race
fit.mix.bin.cl<-glmer(I(bmi>=30)~agez+lths+coll+(black+hispanic+other)*povz+ (1|cofips), family=binomial, data=merged)
display(fit.mix.bin.cl, detail=T)

## glmer(formula = I(bmi >= 30) ~ agez + lths + coll + (black + 
##     hispanic + other) * povz + (1 | cofips), data = merged, family = binomial)
##                coef.est coef.se z value Pr(>|z|)
## (Intercept)     -1.05     0.04  -25.82    0.00  
## agez            -0.02     0.01   -1.43    0.15  
## lths             0.20     0.04    5.00    0.00  
## coll            -0.26     0.03   -9.80    0.00  
## black1           0.65     0.04   17.33    0.00  
## hispanic1        0.35     0.04    9.39    0.00  
## other1          -0.24     0.06   -4.17    0.00  
## povz            -0.01     0.06   -0.20    0.84  
## black1:povz      0.26     0.06    4.31    0.00  
## hispanic1:povz   0.04     0.06    0.63    0.53  
## other1:povz      0.23     0.08    2.80    0.01  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.19    
##  Residual             1.00    
## ---
## number of obs: 45954, groups: cofips, 39
## AIC = 50574.9, DIC = 50323.3
## deviance = 50437.1

exp(confint(fit.mix.bin.cl, method="Wald"))

##                    2.5 %    97.5 %
## .sig01                NA        NA
## (Intercept)    0.3225535 0.3783966
## agez           0.9615167 1.0062192
## lths           1.1324700 1.3297655
## coll           0.7360792 0.8152311
## black1         1.7777941 2.0588243
## hispanic1      1.3143659 1.5181971
## other1         0.7062617 0.8820736
## povz           0.8849521 1.1044400
## black1:povz    1.1492153 1.4499359
## hispanic1:povz 0.9273142 1.1575158
## other1:povz    1.0707399 1.4757557

anova(fit.mix.bin.ml, fit.mix.bin.cl)

## Data: merged
## Models:
## fit.mix.bin.ml: I(bmi >= 30) ~ agez + lths + coll + black + hispanic + other + 
## fit.mix.bin.ml:     povz + (1 | cofips)
## fit.mix.bin.cl: I(bmi >= 30) ~ agez + lths + coll + (black + hispanic + other) * 
## fit.mix.bin.cl:     povz + (1 | cofips)
##                Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## fit.mix.bin.ml  9 50593 50672 -25288    50575                             
## fit.mix.bin.cl 12 50575 50680 -25276    50551 24.233      3  2.233e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

###Segregation * Race
fit.mix.bin.cl2<-glmer(I(bmi>=30)~agez+lths+coll+(black+hispanic+other)*iso.z+ (1|cofips), family=binomial, data=merged)
display(fit.mix.bin.cl2, detail=T)

## glmer(formula = I(bmi >= 30) ~ agez + lths + coll + (black + 
##     hispanic + other) * iso.z + (1 | cofips), data = merged, 
##     family = binomial)
##                 coef.est coef.se z value Pr(>|z|)
## (Intercept)      -1.02     0.05  -22.21    0.00  
## agez             -0.02     0.01   -1.43    0.15  
## lths              0.20     0.04    4.96    0.00  
## coll             -0.26     0.03   -9.87    0.00  
## black1            0.50     0.06    8.77    0.00  
## hispanic1         0.34     0.04    8.13    0.00  
## other1           -0.29     0.06   -4.70    0.00  
## iso.z            -0.02     0.03   -0.83    0.41  
## black1:iso.z      0.10     0.03    3.12    0.00  
## hispanic1:iso.z  -0.01     0.03   -0.41    0.68  
## other1:iso.z     -0.02     0.05   -0.49    0.63  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.18    
##  Residual             1.00    
## ---
## number of obs: 45954, groups: cofips, 39
## AIC = 50588.2, DIC = 50344.1
## deviance = 50454.2

exp(confint(fit.mix.bin.cl2, method="Wald"))

##                     2.5 %    97.5 %
## .sig01                 NA        NA
## (Intercept)     0.3281038 0.3930974
## agez            0.9615012 1.0061976
## lths            1.1307404 1.3277093
## coll            0.7348100 0.8138271
## black1          1.4791022 1.8526811
## hispanic1       1.2927801 1.5218218
## other1          0.6641725 0.8449731
## iso.z           0.9212871 1.0337253
## black1:iso.z    1.0379463 1.1765671
## hispanic1:iso.z 0.9240040 1.0531771
## other1:iso.z    0.8863387 1.0752728

anova(fit.mix.bin.ml, fit.mix.bin.cl2)

## Data: merged
## Models:
## fit.mix.bin.ml: I(bmi >= 30) ~ agez + lths + coll + black + hispanic + other + 
## fit.mix.bin.ml:     povz + (1 | cofips)
## fit.mix.bin.cl2: I(bmi >= 30) ~ agez + lths + coll + (black + hispanic + other) * 
## fit.mix.bin.cl2:     iso.z + (1 | cofips)
##                 Df   AIC   BIC logLik deviance Chisq Chi Df Pr(>Chisq)  
## fit.mix.bin.ml   9 50593 50672 -25288    50575                          
## fit.mix.bin.cl2 12 50588 50693 -25282    50564 10.94      3    0.01206 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The poverty interaction model shows that, individuals who are black and of other race/ethnicity, who are living in a higher than average poverty county, are more likely to be obese, compared to whites in an average poverty county. This model shows improvement compared to the multilevel model.

The segregation interaction model shows that, only for blacks, if they are living in a county that is highly segregated, this increases their risk of being obese, compared to whites in a county of average segregation. This model shows improvement compared to the multilevel model.

Fitted Values

We can get fitted values out of a model by using the predict() method for (lg)mer model fits. Here, I construct the fixed effect matrix for a few people from Oakland (Alameda County, CA), Los Angeles CA and Bexar County TX.

#Get my different types of people from the fixed effects
newdat1<-expand.grid(agez=0, lths=0, coll=0, 
                     black=levels(merged$black),
                     hispanic=levels(merged$hispanic),
                     other=levels(merged$other))
#remove some nonsensicals, people who are multiple ethnicities, etc.
newdat1<-newdat1[c(-4, -6:-8),]

#I need 3 counties worth of people
newdat1<-rbind(newdat1, newdat1, newdat1)
#Put in the county identifier
newdat1$cofips<-c(rep("48029", 4), rep("06001", 4), rep("06037", 4))
#get the poverty z score for these 3 counties
povz <- joindata[joindata$cofips %in%c("48029", "06001", "06037") , "povz"]
newdat1$povz<-c(rep(povz[1], 4), rep(povz[2], 4), rep(povz[3], 4))

#Here's our three counties of example cases:
newdat1

##    agez lths coll black hispanic other cofips       povz
## 1     0    0    0     0        0     0  48029 -0.6237473
## 2     0    0    0     1        0     0  48029 -0.6237473
## 3     0    0    0     0        1     0  48029 -0.6237473
## 5     0    0    0     0        0     1  48029 -0.6237473
## 11    0    0    0     0        0     0  06001  0.2252398
## 21    0    0    0     1        0     0  06001  0.2252398
## 31    0    0    0     0        1     0  06001  0.2252398
## 51    0    0    0     0        0     1  06001  0.2252398
## 12    0    0    0     0        0     0  06037  0.3336212
## 22    0    0    0     1        0     0  06037  0.3336212
## 32    0    0    0     0        1     0  06037  0.3336212
## 52    0    0    0     0        0     1  06037  0.3336212

newdat1$fitted<-predict(fit.mix.bin.cl,newdata=newdat1, type="response")

newdat1

##    agez lths coll black hispanic other cofips       povz    fitted
## 1     0    0    0     0        0     0  48029 -0.6237473 0.3067594
## 2     0    0    0     1        0     0  48029 -0.6237473 0.4192625
## 3     0    0    0     0        1     0  48029 -0.6237473 0.3794321
## 5     0    0    0     0        0     1  48029 -0.6237473 0.2324325
## 11    0    0    0     0        0     0  06001  0.2252398 0.2250427
## 21    0    0    0     1        0     0  06001  0.2252398 0.3704562
## 31    0    0    0     0        1     0  06001  0.2252398 0.2925350
## 51    0    0    0     0        0     1  06001  0.2252398 0.1944083
## 12    0    0    0     0        0     0  06037  0.3336212 0.2159302
## 22    0    0    0     1        0     0  06037  0.3336212 0.3645626
## 32    0    0    0     0        1     0  06037  0.3336212 0.2824601
## 52    0    0    0     0        0     1  06037  0.3336212 0.1900249