Here, we will talk about a few more miscellaneous topics: GLMM’s and Cross level interactions The data I will use here is from the Behavioral Risk Factor Surveillance System (BRFSS) SMART county data First we load our data
library(lme4)
## Loading required package: Matrix
## Loading required package: Rcpp
library(arm)
## Loading required package: MASS
##
## arm (Version 1.7-05, built: 2014-8-1)
##
## Working directory is /Users/ozd504/Google Drive/dem7903_App_Hier/code/code14
library(car)
##
## Attaching package: 'car'
##
## The following object is masked from 'package:arm':
##
## logit
library(MASS)
library(ordinal)
##
## Attaching package: 'ordinal'
##
## The following object is masked from 'package:arm':
##
## ranef
##
## The following objects are masked from 'package:lme4':
##
## ranef, VarCorr
#load brfss
load("~/Google Drive/dem7903_App_Hier/data/brfss_11.Rdata")
nams<-names(brfss_11)
newnames<-gsub("_", "", nams)
names(brfss_11)<-tolower(newnames)
brfss_11$statefip<-sprintf("%02d", brfss_11$state )
brfss_11$cofip<-sprintf("%03d", brfss_11$cnty )
brfss_11$cofips<-paste(brfss_11$statefip, brfss_11$cofip, sep="")
#Outcome is: Now thinking about your physical health, which includes physical illness and injury, for how many days during the past 30 days was your physical health not good?
brfss_11$healthydays<-ifelse(brfss_11$physhlth %in%c(77,99), NA,ifelse(brfss_11$physhlth==88,0, brfss_11$physhlth))
#just for brevity, I just select TX respondents with non missing weights
brfss_11<-brfss_11[is.na(brfss_11$cntywt)==F,]
#insurance
brfss_11$ins<-ifelse(brfss_11$hlthpln1==1,1,0)
#smoking currently
brfss_11$smoke<-recode(brfss_11$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#low physical activity
brfss_11$lowact<-recode(brfss_11$pacat, recodes="1:2=0; 3:4=1; else=NA")
#high blood pressure
brfss_11$hbp<-recode(brfss_11$bphigh4, recodes="1=1; 2:4=0; else=NA")
#high cholesterol
brfss_11$hc<-recode(brfss_11$toldhi2, recodes="1=1; 2=0; else=NA")
#bmi
brfss_11$bmi<-ifelse(is.na(brfss_11$bmi5)==T, NA, brfss_11$bmi5/100)
#poor or fair health
brfss_11$badhealth<-ifelse(brfss_11$genhlth %in% c(4,5),1,0)
#race
brfss_11$black<-recode(brfss_11$racegr2, recodes="2=1; 9=NA; else=0", as.factor.result=T)
brfss_11$white<-recode(brfss_11$racegr2, recodes="1=1; 9=NA; else=0", as.factor.result=T)
brfss_11$other<-recode(brfss_11$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor.result=T)
brfss_11$hispanic<-recode(brfss_11$racegr2, recodes="5=1; 9=NA; else=0", as.factor.result=T)
#have a personal doctor?
brfss_11$doc<-recode(brfss_11$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor.result=F)
#needed care in last year but couldn't get it because of cost
brfss_11$medacc<-recode(brfss_11$medcost, recodes="1=1;2=0;else=NA")
#education level
brfss_11$lths<-recode(brfss_11$educa, recodes="1:3=1;9=NA; else=0", as.factor.result=F)
brfss_11$coll<-recode(brfss_11$educa, recodes="5:6=1;9=NA; else=0", as.factor.result=F)
#employment
brfss_11$employ<-recode(brfss_11$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor.result=T)
brfss_11$employ<-relevel(brfss_11$employ, ref='Employed')
#marital status
brfss_11$marst<-recode(brfss_11$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor.result=T)
brfss_11$marst<-relevel(brfss_11$marst, ref='married')
#income
brfss_11$inc<-as.factor(ifelse(brfss_11$incomg==9, NA, brfss_11$incomg))
#Age cut into intervals
brfss_11$agec<-cut(brfss_11$age, breaks=c(0,24,39,59,79,99))
Now we will begin fitting the multilevel regression model with the county that the person lives in being the higher level
I will also add in some Census variables from the ACS 2010 5 Year estimates load in ACS data from Factfinder
acsecon<-read.csv("~/Google Drive/dem7903_App_Hier/data/aff_download/ACS_10_5YR_DP03_with_ann.csv")
acsecon$povrate<-acsecon[, "HC03_VC156"]
acsecon$unemployed<-acsecon[, "HC03_VC13"]
acsecon$cofips<-substr(acsecon$GEO.id, 10,14)
acsecon<-acsecon[, c("cofips", "povrate", "unemployed")]
joindata<-acsecon
head(acsecon)
## cofips povrate unemployed
## 1 01001 7.5 6.2
## 2 01003 9.1 6.6
## 3 01005 19.9 9.6
## 4 01007 9.4 9.1
## 5 01009 10.0 7.5
## 6 01011 22.6 11.2
I also have segregation measures at the count level that I made, I will merge these in too As a rule, when including continuous predictors at the higher level, you should always standardize them This is done for 2 reasons, first is that it makes the computer happy, second is that it makes the interpretation of the higher level variable easier.
load("~/Google Drive/dem7903_App_Hier/data/segregation_county.Rdata")
joindata<-merge(joindata, county_dat, by.x="cofips", by.y="cofips", all.x=T)
joindata$povz<-scale(joindata$povrate, center=T, scale=T)
joindata$unempz<-scale(joindata$unemployed, center=T, scale=T)
joindata$diss.z<-scale(joindata$dissim_wb, center=T, scale=T)
joindata$iso.z<-scale(joindata$isolation_b, center=T, scale=T)
joindata$theil.z<-scale(joindata$TheileH, center=T, scale=T)
head(joindata)
## cofips povrate unemployed dissim_wb isolation_b interaction_bw TheileH
## 1 01001 7.5 6.2 0.2898 0.27475 0.6897 0.08300
## 2 01003 9.1 6.6 0.3903 0.18464 0.7674 0.09204
## 3 01005 19.9 9.6 0.1914 0.50243 0.4518 0.04899
## 4 01007 9.4 9.1 0.4339 0.33170 0.6477 0.11960
## 5 01009 10.0 7.5 0.3777 0.02201 0.9041 0.05284
## 6 01011 22.6 11.2 0.2412 0.71810 0.2139 0.05627
## povz unempz diss.z iso.z theil.z
## 1 -0.6960 -0.396271 -0.1041 0.7875 0.1091
## 2 -0.4070 -0.277121 0.4778 0.2928 0.2125
## 3 1.5439 0.616508 -0.6734 2.0376 -0.2797
## 4 -0.3528 0.467570 0.7300 1.1002 0.5276
## 5 -0.2444 -0.009032 0.4046 -0.6001 -0.2357
## 6 2.0316 1.093110 -0.3849 3.2217 -0.1964
#and merge the data back to the kids data
merged<-merge(x=brfss_11, y=joindata, by.x="cofips", by.y="cofips", all.x=T)
## Warning: column names 'cholchk', 'mrace' are duplicated in the result
merged$bmiz<-scale(merged$bmi, center=T, scale=T)
merged$agez<-scale(merged$age, center=T, scale=T)
Here I fit a binomial model (binary logistic regression) for healthcare access (1= has a regular primary care physician, 0=does not) First a simple random intercept model, then a model with a higher level variable, then a cross level interaction model
#################
#### GLMM's #####
#################
#I subset the data to only have states with more than 8 counties sampled
tapply(merged$cofips, merged$state, function(x) length(unique(x))>5)
## 1 2 4 5 6 8 9 10 11 12 13 15
## FALSE FALSE FALSE FALSE TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE
## 16 17 18 19 20 21 22 23 24 25 26 27
## FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE FALSE TRUE
## 29 30 31 32 33 34 35 36 37 38 39 40
## FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE FALSE
## 41 42 44 45 46 48 49 50 53 54 55 56
## FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE FALSE
merged<-merged[merged$statefip%in%c("06","08","23","30", "31", "25","39","45" ,"48","49"),]
fit.mix.bin<-glmer(doc~agez+lths+coll+black+hispanic+other+(1|cofips), family=binomial, data=merged)
summary(fit.mix.bin)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## doc ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
## Data: merged
##
## AIC BIC logLik deviance df.resid
## 69835 69911 -34910 69819 99208
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -11.748 0.196 0.284 0.405 1.923
##
## Random effects:
## Groups Name Variance Std.Dev.
## cofips (Intercept) 0.142 0.377
## Number of obs: 99216, groups: cofips, 91
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.9297 0.0445 43.4 < 2e-16 ***
## agez 0.7563 0.0106 71.7 < 2e-16 ***
## lths -0.4527 0.0356 -12.7 < 2e-16 ***
## coll 0.4002 0.0226 17.7 < 2e-16 ***
## black1 -0.1825 0.0416 -4.4 1.2e-05 ***
## hispanic1 -0.5891 0.0320 -18.4 < 2e-16 ***
## other1 -0.3561 0.0401 -8.9 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) agez lths coll black1 hspnc1
## agez 0.074
## lths -0.184 -0.070
## coll -0.340 0.054 0.385
## black1 -0.097 0.048 -0.039 0.059
## hispanic1 -0.107 0.147 -0.248 0.121 0.164
## other1 -0.065 0.090 -0.042 -0.011 0.101 0.164
#Then I do a multi-level model where I include the poverty rate and the segregation index at the county level
fit.mix.bin.ml<-glmer(doc~agez+lths+coll+black+hispanic+other+povz+(1|cofips), family=binomial, data=merged)
summary(fit.mix.bin.ml)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: doc ~ agez + lths + coll + black + hispanic + other + povz +
## (1 | cofips)
## Data: merged
##
## AIC BIC logLik deviance df.resid
## 69834 69920 -34908 69816 99207
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -11.759 0.196 0.284 0.405 1.922
##
## Random effects:
## Groups Name Variance Std.Dev.
## cofips (Intercept) 0.137 0.371
## Number of obs: 99216, groups: cofips, 91
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.8932 0.0491 38.6 < 2e-16 ***
## agez 0.7565 0.0106 71.7 < 2e-16 ***
## lths -0.4522 0.0356 -12.7 < 2e-16 ***
## coll 0.3996 0.0226 17.7 < 2e-16 ***
## black1 -0.1794 0.0417 -4.3 1.7e-05 ***
## hispanic1 -0.5886 0.0320 -18.4 < 2e-16 ***
## other1 -0.3547 0.0401 -8.9 < 2e-16 ***
## povz -0.1114 0.0669 -1.7 0.096 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) agez lths coll black1 hspnc1 other1
## agez 0.060
## lths -0.170 -0.070
## coll -0.302 0.054 0.385
## black1 -0.108 0.049 -0.038 0.058
## hispanic1 -0.102 0.147 -0.248 0.121 0.164
## other1 -0.068 0.090 -0.042 -0.011 0.101 0.164
## povz 0.446 -0.016 -0.009 0.015 -0.044 -0.012 -0.020
anova(fit.mix.bin, fit.mix.bin.ml)
## Data: merged
## Models:
## fit.mix.bin: doc ~ agez + lths + coll + black + hispanic + other + (1 | cofips)
## fit.mix.bin.ml: doc ~ agez + lths + coll + black + hispanic + other + povz +
## fit.mix.bin.ml: (1 | cofips)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.mix.bin 8 69835 69911 -34910 69819
## fit.mix.bin.ml 9 69834 69920 -34908 69816 2.72 1 0.099 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Little evidence of poverty effect
#Cross level interaction model between poverty and race and segregation and race
#You simply include a term of individualvar*highervar
fit.mix.bin.cl<-glmer(doc~agez+lths+coll+(black+hispanic+other)*povz+(1|cofips), family=binomial, data=merged)
summary(fit.mix.bin.cl)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: doc ~ agez + lths + coll + (black + hispanic + other) * povz +
## (1 | cofips)
## Data: merged
##
## AIC BIC logLik deviance df.resid
## 69835 69949 -34905 69811 99204
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -11.762 0.196 0.284 0.404 1.908
##
## Random effects:
## Groups Name Variance Std.Dev.
## cofips (Intercept) 0.138 0.372
## Number of obs: 99216, groups: cofips, 91
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.8953 0.0494 38.3 < 2e-16 ***
## agez 0.7565 0.0106 71.7 < 2e-16 ***
## lths -0.4533 0.0356 -12.7 < 2e-16 ***
## coll 0.4001 0.0226 17.7 < 2e-16 ***
## black1 -0.1780 0.0424 -4.2 2.7e-05 ***
## hispanic1 -0.5676 0.0357 -15.9 < 2e-16 ***
## other1 -0.3833 0.0440 -8.7 < 2e-16 ***
## povz -0.1063 0.0683 -1.6 0.12
## black1:povz -0.0114 0.0721 -0.2 0.87
## hispanic1:povz 0.0792 0.0550 1.4 0.15
## other1:povz -0.1010 0.0615 -1.6 0.10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) agez lths coll black1 hspnc1 other1 povz blck1:
## agez 0.059
## lths -0.166 -0.070
## coll -0.300 0.054 0.384
## black1 -0.123 0.049 -0.041 0.059
## hispanic1 -0.120 0.137 -0.238 0.112 0.192
## other1 -0.093 0.084 -0.045 -0.012 0.116 0.180
## povz 0.456 -0.017 -0.004 0.013 -0.074 -0.064 -0.076
## black1:povz -0.076 -0.003 -0.001 0.009 0.167 0.083 0.047 -0.125
## hispnc1:pvz -0.060 0.010 -0.036 0.009 0.098 0.445 0.082 -0.111 0.136
## other1:povz -0.071 0.005 -0.015 -0.005 0.054 0.086 0.407 -0.130 0.100
## hspn1:
## agez
## lths
## coll
## black1
## hispanic1
## other1
## povz
## black1:povz
## hispnc1:pvz
## other1:povz 0.144
anova(fit.mix.bin.ml, fit.mix.bin.cl)
## Data: merged
## Models:
## fit.mix.bin.ml: doc ~ agez + lths + coll + black + hispanic + other + povz +
## fit.mix.bin.ml: (1 | cofips)
## fit.mix.bin.cl: doc ~ agez + lths + coll + (black + hispanic + other) * povz +
## fit.mix.bin.cl: (1 | cofips)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.mix.bin.ml 9 69834 69920 -34908 69816
## fit.mix.bin.cl 12 69835 69949 -34905 69811 5.6 3 0.13
#Cross level interaction is not significant
Here is an example of using a Poisson outcome, number of healthy days reported in the last month
fit.pois<-glmer(healthydays~(black+hispanic+other)+lths+coll+agez+badhealth+(1|cofips),family=poisson, data=merged)
summary(fit.pois)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: poisson ( log )
## Formula: healthydays ~ (black + hispanic + other) + lths + coll + agez +
## badhealth + (1 | cofips)
## Data: merged
##
## AIC BIC logLik deviance df.resid
## 940865 940950 -470423 940847 97370
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.413 -1.478 -1.374 -0.041 25.571
##
## Random effects:
## Groups Name Variance Std.Dev.
## cofips (Intercept) 0.00962 0.0981
## Number of obs: 97379, groups: cofips, 91
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.76160 0.01095 70 < 2e-16 ***
## black1 -0.09628 0.00686 -14 < 2e-16 ***
## hispanic1 -0.15690 0.00621 -25 < 2e-16 ***
## other1 0.02596 0.00731 4 0.00038 ***
## lths 0.01640 0.00547 3 0.00273 **
## coll -0.05645 0.00365 -15 < 2e-16 ***
## agez 0.08302 0.00174 48 < 2e-16 ***
## badhealth 1.90296 0.00342 557 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) black1 hspnc1 other1 lths coll agez
## black1 -0.047
## hispanic1 -0.050 0.144
## other1 -0.035 0.089 0.130
## lths -0.098 -0.049 -0.253 -0.051
## coll -0.240 0.037 0.089 -0.020 0.365
## agez -0.031 0.084 0.200 0.116 -0.033 0.057
## badhealth -0.186 -0.069 -0.082 -0.050 -0.076 0.145 -0.186
Perhaps a negative binomial model would be more appropriate, as our outcome is very overdispersed:
fit.nb<-glmer.nb(healthydays~(black+hispanic+other)+lths+coll+agez+badhealth+(1|cofips), data=merged)
summary(fit.nb)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: Negative Binomial(0.1739) ( log )
## Formula: healthydays ~ (black + hispanic + other) + lths + coll + agez +
## badhealth + (1 | cofips)
## Data: ..2
##
## AIC BIC logLik deviance df.resid
## 355794 355889 -177887 355774 97369
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -0.416 -0.402 -0.399 -0.003 8.639
##
## Random effects:
## Groups Name Variance Std.Dev.
## cofips (Intercept) 0.00738 0.0859
## Residual 0.99508 0.9975
## Number of obs: 97379, groups: cofips, 91
##
## Fixed effects:
## Estimate Std. Error t value Pr(>|z|)
## (Intercept) 0.79309 0.01938 40.9 <2e-16 ***
## black1 -0.01526 0.03512 -0.4 0.6639
## hispanic1 -0.00451 0.03107 -0.1 0.8845
## other1 0.07784 0.03727 2.1 0.0367 *
## lths 0.09651 0.03340 2.9 0.0039 **
## coll -0.15035 0.01853 -8.1 5e-16 ***
## agez 0.11697 0.00832 14.1 <2e-16 ***
## badhealth 1.87339 0.02136 87.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) black1 hspnc1 other1 lths coll agez
## black1 -0.139
## hispanic1 -0.162 0.115
## other1 -0.112 0.076 0.107
## lths -0.317 -0.047 -0.190 -0.035
## coll -0.700 0.028 0.075 -0.002 0.367
## agez -0.047 0.055 0.186 0.102 -0.033 0.039
## badhealth -0.226 -0.039 -0.035 -0.021 -0.066 0.077 -0.121
GLMMs can also be used on ordinal outcomes, for example an ordinal logit:
#Ordinal Logit GLMM
merged$hlth<-recode(merged$genhlth, recodes="1=1; 2=2; 3=3; 4=4;5=5; else=NA", as.factor.result=T)
table(merged$hlth)
##
## 1 2 3 4 5
## 19558 33596 30123 12535 4907
fit.mix.clmm<-clmm(hlth~agez+lths+coll+black+hispanic+other+theil.z+povz+(1|cofips), data=merged)
summary(fit.mix.clmm)
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: hlth ~ agez + lths + coll + black + hispanic + other + theil.z +
## povz + (1 | cofips)
## data: merged
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 99181 -138098.49 276222.97 2528(5059) 1.10e-01 3.2e+02
##
## Random effects:
## Groups Name Variance Std.Dev.
## cofips (Intercept) 0.02 0.142
## Number of groups: cofips 91
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## agez 0.33199 0.00607 54.66 < 2e-16 ***
## lths 0.71778 0.02449 29.31 < 2e-16 ***
## coll -0.64604 0.01363 -47.38 < 2e-16 ***
## black1 0.61881 0.02600 23.80 < 2e-16 ***
## hispanic1 0.62383 0.02329 26.79 < 2e-16 ***
## other1 0.42949 0.02726 15.76 < 2e-16 ***
## theil.z 0.01256 0.01717 0.73 0.46
## povz 0.12600 0.02807 4.49 7.2e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 -1.8492 0.0242 -76.4
## 2|3 -0.1993 0.0234 -8.5
## 3|4 1.3953 0.0240 58.1
## 4|5 2.8888 0.0271 106.7
## (1905 observations deleted due to missingness)
how do I get ICC from one of these? For the binomial model, the residual variance is a constant of \(\pi^{2}/3\) For E.g. from the first model:
.142/(.142+ (pi^2)/3)
## [1] 0.04138