DEM 7283 - Multi-level Models Assignment, Part 2

All the variables in this dataset were compiled the Behavioral Risk Factor Surveillance System (BRFFS).

For this assignment, we include as a binary outcome whether the person exercised or not in the last 30 days. As our group level variables we consider the Metropolitan Metro areas as well as the Gini coeffiecients. Our individual variables consist of the level of education and race/ethnicity for all participants.

The number of observations for this analysis is: 69,793

RESEARCH QUESTION Our analysis, attempts to respond the following question: do individuals who have a lower level of education, who are racial/ethnic minorities and live in areas with high poverty rates are less likely to exercise than others?

#load brfss
library(car)
library(stargazer)
library(survey)
library(sjPlot)
library(ggplot2)
library(pander)
library(knitr)
library(lme4)
load("C:/Users/PCMcC/Documents/Statistics II/Data for project/brfss16_mmsa.Rdata")
set.seed(12345)
samps<-sample(1:nrow(brfss16m), size = 90000, replace=F)
brfss16m<-brfss16m[samps,]
#Name modification
nams<-names(brfss16m)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",replacement =  "",x =  nams)
names(brfss16m)<-tolower(newnames)
nams<-names(brfss16m)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",replacement =  "",x =  nams)
names(brfss16m)<-tolower(newnames)

Binary Variable for Analysis

#Exercise
#During the past month, other than your regular job, did you participate in any physical activities or exercises such as running, calisthenics, golf, gardening, or walking for exercise? yes=1, no=0, don't now=7, refused=9, not asked=blank
brfss16m$exercise<-recode(brfss16m$exerany2, recodes="1=1; 2=0; else=NA")
table(brfss16m$exercise)

## 
##     0     1 
## 21592 68259

#sex
brfss16m$female<-ifelse(brfss16m$sex==2, 2, 0)

#Healthy days
brfss16m$healthdays<-recode(brfss16m$physhlth, recodes = "88=0; 77=NA; 99=NA")

#Healthy mental health days
brfss16m$healthmdays<-recode(brfss16m$menthlth, recodes = "88=0; 77=NA; 99=NA")

#Education
brfss16m$education<-recode(brfss16m$educa, recodes="1='no school'; 2='Grade 1-8'; 3='Grade 9-11';4='high school'; 5='some college'; 6='college or more'; 9='NA'",as.factor.result = T)
brfss16m$race_eth<-relevel(brfss16m$education, ref = "college or more")

brfss16m$badhealth<-recode(brfss16m$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss16m$black<-recode(brfss16m$racegr3, recodes="2=1; 9=NA; else=0")
brfss16m$white<-recode(brfss16m$racegr3, recodes="1=1; 9=NA; else=0")
brfss16m$other<-recode(brfss16m$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss16m$hispanic<-recode(brfss16m$racegr3, recodes="5=1; 9=NA; else=0")

brfss16m$race_eth<-recode(brfss16m$racegr3, 
                          recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
                          as.factor.result = T)
brfss16m$race_eth<-relevel(brfss16m$race_eth, ref = "nhwhite")

#insurance
brfss16m$ins<-recode(brfss16m$hlthpln1, recodes ="7:9=NA; 1=1;2=0")

#education level
brfss16m$educ<-recode(brfss16m$educa,
                      recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
                      as.factor.result=T)
brfss16m$educ<-relevel(brfss16m$educ, ref='2hsgrad')

#employment
brfss16m$employ<-recode(brfss16m$employ1,
                        recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
                        as.factor.result=T)
brfss16m$employ<-relevel(brfss16m$employ, ref='employloyed')

#marital status
brfss16m$marst<-recode(brfss16m$marital,
                       recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
                       as.factor.result=T)
brfss16m$marst<-relevel(brfss16m$marst, ref='married')

#Drinking days
brfss16m$daysdrink<-recode(brfss16m$avedrnk2, recodes = "77=NA; 99=NA")
summary(brfss16m$daysdrink)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    1.00    1.00    2.00    2.16    2.00   76.00   44337

I want to see how many people we have in each MSA in the data:

#Now we will begin fitting the multilevel regression model with the msa
#that the person lives in being the higher level
head(data.frame(name=table(brfss16m$mmsaname),id=unique(brfss16m$mmsa)))

##                                                          name.Var1
## 1       Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2                   Albuquerque, NM, Metropolitan Statistical Area
## 3 Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4                     Anchorage, AK, Metropolitan Statistical Area
## 5 Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6    Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
##   name.Freq    id
## 1       997 39900
## 2       463 45060
## 3       254 41060
## 4       380 45220
## 5       871 33340
## 6       306 16980

#people within each msa

#How many total MSAs are in the data?
length(table(brfss16m$mmsa))

## [1] 143

#counties

Higher level predictors

We will often be interested in factors at both the individual AND contextual levels. To illustrate this, we will use data from the American Community Survey measured at the MSA level. Specifically, we use the DP3 table, which provides economic characteristics of places, from the 2010 5 year ACS Link.

library(acs)
#Get 2010 ACS median household incomes for tracts in Texas
msaacs<-geo.make(msa="*")

#ACS table B03002 is Hispanic ethnicity by race 
acsecon<-acs.fetch(key=pckey, endyear=2010, span=5, geography=msaacs, variable = c("B19083_001","B17001_001","B17001_002" ))

colnames(acsecon@estimate)

## [1] "B19083_001" "B17001_001" "B17001_002"

msaecon<-data.frame(gini=acsecon@estimate[, "B19083_001"], 
ppoverty=acsecon@estimate[, "B17001_002"]/acsecon@estimate[, "B17001_001"],
giniz=scale(acsecon@estimate[, "B19083_001"]), 
ppovertyz=scale(acsecon@estimate[, "B17001_002"]/acsecon@estimate[, "B17001_001"]))

msaecon$ids<-paste(acsecon@geography$metropolitanstatisticalareamicropolitanstatisticalarea)

Merge the MSA data to the BRFSS data

mixdata<-merge(brfss16m, msaecon, by.x="mmsa",by.y="ids", all.x=T)
mixdata<-mixdata[complete.cases(mixdata[, c("race_eth", "exercise", "educ", "gini")]),]

head(mixdata[, c("race_eth", "exercise", "educ", "gini")])

##   race_eth exercise     educ  gini
## 1  nhwhite        1 4colgrad 0.427
## 2  nhwhite        1  2hsgrad 0.427
## 3 nh other        1  2hsgrad 0.427
## 4  nhwhite        1 4colgrad 0.427
## 5  nhwhite        1 3somecol 0.427
## 6  nhwhite        1 4colgrad 0.427

Cross level interaction effects

The MSA Level Variables show that there is a negative relationship between exercises and those with a some highshool or lower as well as black individuals and those who live in areas with a lower level of wealth.

#MSA level variables
fit.mix1<-lmer(exercise~+educ+race_eth+giniz+(1|mmsa), data=mixdata)
arm::display(fit.mix1, detail=T)

## lmer(formula = exercise ~ +educ + race_eth + giniz + (1 | mmsa), 
##     data = mixdata)
##                      coef.est coef.se t value
## (Intercept)            0.66     0.00  138.63 
## educ0Prim             -0.13     0.01  -11.40 
## educ1somehs           -0.10     0.01  -12.66 
## educ3somecol           0.09     0.00   21.89 
## educ4colgrad           0.20     0.00   49.87 
## race_ethhispanic       0.00     0.01    0.53 
## race_ethnh black      -0.04     0.01   -6.71 
## race_ethnh multirace   0.00     0.01    0.38 
## race_ethnh other      -0.02     0.01   -2.34 
## giniz                 -0.03     0.01   -5.10 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.04    
##  Residual             0.42    
## ---
## number of obs: 69793, groups: mmsa, 123
## AIC = 75656.1, DIC = 75466.2
## deviance = 75549.1

The cross level interaction tells a different story; the ability of non-hispanic blacks with a low level of education (primary or some high school) to exercise is highly influenced by the area’s income or wealth.

#Cross-level interaction model:
fit.mix2<-lmer(exercise~educ+race_eth*educ+giniz+(1|mmsa), mixdata, REML=F)
arm::display(fit.mix2, detail=T)

## lmer(formula = exercise ~ educ + race_eth * educ + giniz + (1 | 
##     mmsa), data = mixdata, REML = F)
##                                   coef.est coef.se t value
## (Intercept)                         0.66     0.00  133.94 
## educ0Prim                          -0.16     0.02   -8.72 
## educ1somehs                        -0.12     0.01  -11.43 
## educ3somecol                        0.10     0.00   19.60 
## educ4colgrad                        0.21     0.00   45.61 
## race_ethhispanic                    0.02     0.01    1.76 
## race_ethnh black                   -0.05     0.01   -4.91 
## race_ethnh multirace                0.05     0.03    1.77 
## race_ethnh other                    0.01     0.02    0.32 
## giniz                              -0.03     0.01   -5.13 
## educ0Prim:race_ethhispanic          0.03     0.03    1.18 
## educ1somehs:race_ethhispanic        0.04     0.02    1.71 
## educ3somecol:race_ethhispanic      -0.03     0.02   -1.72 
## educ4colgrad:race_ethhispanic      -0.05     0.02   -3.26 
## educ0Prim:race_ethnh black          0.14     0.04    3.21 
## educ1somehs:race_ethnh black        0.07     0.02    3.09 
## educ3somecol:race_ethnh black       0.01     0.01    0.43 
## educ4colgrad:race_ethnh black       0.01     0.01    0.48 
## educ0Prim:race_ethnh multirace     -0.21     0.10   -2.19 
## educ1somehs:race_ethnh multirace   -0.08     0.06   -1.32 
## educ3somecol:race_ethnh multirace  -0.03     0.03   -0.92 
## educ4colgrad:race_ethnh multirace  -0.06     0.03   -1.84 
## educ0Prim:race_ethnh other          0.01     0.06    0.12 
## educ1somehs:race_ethnh other       -0.09     0.05   -1.96 
## educ3somecol:race_ethnh other       0.01     0.03    0.38 
## educ4colgrad:race_ethnh other      -0.05     0.02   -2.32 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.04    
##  Residual             0.42    
## ---
## number of obs: 69793, groups: mmsa, 123
## AIC = 75543.3, DIC = 75487.3
## deviance = 75487.3

A comparison of the two models shows an interaction.

#compare the models with a LRT. There is evidence of interaction.
anova(fit.mix1, fit.mix2)

## refitting model(s) with ML (instead of REML)

## Data: mixdata
## Models:
## fit.mix1: exercise ~ +educ + race_eth + giniz + (1 | mmsa)
## fit.mix2: exercise ~ educ + race_eth * educ + giniz + (1 | mmsa)
##          Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## fit.mix1 12 75573 75683 -37775    75549                             
## fit.mix2 28 75543 75800 -37744    75487 61.778     16  2.619e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Multilevel Model

anova(glm(exercise~ educ+race_eth+giniz+(1|mmsa), mixdata, family=binomial), test="Chisq")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: exercise
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                     69792      77684              
## educ      4   3927.5     69788      73756 < 2.2e-16 ***
## race_eth  4    104.0     69784      73652 < 2.2e-16 ***
## giniz     1    196.2     69783      73456 < 2.2e-16 ***
## 1 | mmsa  0      0.0     69783      73456              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Multilevel Logistic Data with Group Level Variables

The following results confirm previous findings; higher levels of education are related with a higher reports of exercising. In fact, individuals with a college education are 3 times more likely to have exercised in the last 30 days when compared to those with just a high school education. Black individuals OR those who live in areas with lower levels of wealth are also less likely to have exercised in the past 30 days.

fit.mix.bin<-glmer(I(exercise>0)~educ+race_eth+giniz+(1|mmsa), family=binomial, mixdata,
                   control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))
arm::display(fit.mix.bin, detail=T)

## glmer(formula = I(exercise > 0) ~ educ + race_eth + giniz + (1 | 
##     mmsa), data = mixdata, family = binomial, control = glmerControl(optimizer = "bobyqa", 
##     optCtrl = list(maxfun = 2e+05)))
##                      coef.est coef.se z value Pr(>|z|)
## (Intercept)            0.69     0.03   26.39    0.00  
## educ0Prim             -0.52     0.06   -9.39    0.00  
## educ1somehs           -0.42     0.04  -10.63    0.00  
## educ3somecol           0.46     0.02   19.85    0.00  
## educ4colgrad           1.17     0.02   48.99    0.00  
## race_ethhispanic       0.01     0.04    0.14    0.89  
## race_ethnh black      -0.20     0.03   -6.37    0.00  
## race_ethnh multirace   0.02     0.07    0.31    0.76  
## race_ethnh other      -0.13     0.05   -2.44    0.01  
## giniz                 -0.15     0.03   -4.99    0.00  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.21    
##  Residual             1.00    
## ---
## number of obs: 69793, groups: mmsa, 123
## AIC = 73152.6, DIC = 72573.9
## deviance = 72852.2

#odds ratios
exp(fixef(fit.mix.bin)[-1])

##            educ0Prim          educ1somehs         educ3somecol 
##            0.5943653            0.6575536            1.5769399 
##         educ4colgrad     race_ethhispanic     race_ethnh black 
##            3.2173542            1.0051307            0.8169054 
## race_ethnh multirace     race_ethnh other                giniz 
##            1.0232416            0.8802847            0.8607573

Multilevel Logistic Data WITHOUT Group Level Variable (Gini)

An multilevel logistic analysis that excludes the GINI coeffecient shows a similar trend. Removing the GINI variable does not seem to have a strong effect in the odd ratios for any group. Although there is a slight shift observed amongst the black group, we can conclude that the level of education is a much more important factor that influences the person’s ability to exercise than it is the level of wealth of the area in which they live.

fit.mix.bin2<-glmer(I(exercise>0)~educ+race_eth+(1|mmsa), family=binomial, mixdata,
                   control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))
arm::display(fit.mix.bin, detail=T)

## glmer(formula = I(exercise > 0) ~ educ + race_eth + giniz + (1 | 
##     mmsa), data = mixdata, family = binomial, control = glmerControl(optimizer = "bobyqa", 
##     optCtrl = list(maxfun = 2e+05)))
##                      coef.est coef.se z value Pr(>|z|)
## (Intercept)            0.69     0.03   26.39    0.00  
## educ0Prim             -0.52     0.06   -9.39    0.00  
## educ1somehs           -0.42     0.04  -10.63    0.00  
## educ3somecol           0.46     0.02   19.85    0.00  
## educ4colgrad           1.17     0.02   48.99    0.00  
## race_ethhispanic       0.01     0.04    0.14    0.89  
## race_ethnh black      -0.20     0.03   -6.37    0.00  
## race_ethnh multirace   0.02     0.07    0.31    0.76  
## race_ethnh other      -0.13     0.05   -2.44    0.01  
## giniz                 -0.15     0.03   -4.99    0.00  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.21    
##  Residual             1.00    
## ---
## number of obs: 69793, groups: mmsa, 123
## AIC = 73152.6, DIC = 72573.9
## deviance = 72852.2

#odds ratios
exp(fixef(fit.mix.bin2)[-1])

##            educ0Prim          educ1somehs         educ3somecol 
##            0.5947029            0.6570282            1.5749383 
##         educ4colgrad     race_ethhispanic     race_ethnh black 
##            3.2081725            0.9914936            0.8096774 
## race_ethnh multirace     race_ethnh other 
##            1.0223257            0.8782305

#LRT between models
anova(fit.mix.bin, fit.mix.bin2)

## Data: mixdata
## Models:
## fit.mix.bin2: I(exercise > 0) ~ educ + race_eth + (1 | mmsa)
## fit.mix.bin: I(exercise > 0) ~ educ + race_eth + giniz + (1 | mmsa)
##              Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## fit.mix.bin2 10 73173 73264 -36576    73153                             
## fit.mix.bin  11 73153 73253 -36565    73131 22.343      1  2.281e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

CONCLUSION

Going back to our research question, do individuals who have a lower level of education, who are racial/ethnic minorities and live in areas with high poverty rates are less likely to exercise than others?.. The answer is no.

Although racial minorities, primarily black show a disadvantage when compared to other groups, the gap is certainly larger by levels of education than by race_ethnicity. Furthermore, including the GINI variable shows little impact. We conclude that there is a small interaction between the area’s level of wealth and the race/ethnicity of the individual in their likelihood to exercise. The level of education is by far, the strongest determinant in a person likelihood to exercise.