My outcome of interest is a person’s average alcoholic drinks per day in past 30 days, measured by asking, ‘during the past 30 days, on the days when you drank, about how many drinks did you drink on the average? (A 40 ounce beer would count as 3 drinks, or a cocktail drink with 2 shots would count as 2 drinks)’. For this homework, I am using 2012 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART county data and merging it to the 2010 American Community Survey 5-year estimates at the county level to get the higher level predictors.

Loading Libraries

library(lme4)
library(arm)
library(car)
library(haven)
library(tidycensus)
library(dplyr)
library(acs)
library(censusapi)
library(tidyverse)
library(sf)

Loading BRFSS Data

brfss_12<-read_xpt("C://users/munta/Google Drive/dem7473/data/CNTY12.xpt")
nams<-names(brfss_12)
newnames<-gsub("_", "", nams)
names(brfss_12)<-make.names(tolower(newnames), unique = T)

Recoding Variables

brfss_12$statefip<-sprintf("%02d", brfss_12$state )
brfss_12$cofip<-sprintf("%03d", brfss_12$cnty )
brfss_12$cofips<-paste(brfss_12$statefip, brfss_12$cofip, sep="")

#Avg alcoholic drinks per day in past 30
#One drink is equivalent to a 12-ounce beer, a 5-ounce glass of wine, or a drink with one shot of liquor. During the past 30 days, on the days when you drank, about how many drinks did you drink on the average?
brfss_12$avgdrnk<-Recode(brfss_12$avedrnk2, recodes="77=NA; 99=NA", as.factor=T)

#insurance
brfss_12$ins<-ifelse(brfss_12$hlthpln1==1,1,0)
#smoking currently
brfss_12$smoke<-Recode(brfss_12$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#bmi
brfss_12$bmi<-ifelse(is.na(brfss_12$bmi5)==T, NA, brfss_12$bmi5/100)
#poor or fair health
brfss_12$badhealth<-ifelse(brfss_12$genhlth %in% c(4,5),1,0)

#race
brfss_12$black<-Recode(brfss_12$racegr2, recodes="2=1; 9=NA; else=0", as.factor=T)
brfss_12$white<-Recode(brfss_12$racegr2, recodes="1=1; 9=NA; else=0", as.factor=T)
brfss_12$other<-Recode(brfss_12$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor=T)
brfss_12$hispanic<-Recode(brfss_12$racegr2, recodes="5=1; 9=NA; else=0", as.factor=T)

#have a personal doctor?
brfss_12$doc<-Recode(brfss_12$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor=F)

#needed care in last year but couldn't get it because of cost
brfss_12$medacc<-Recode(brfss_12$medcost, recodes="1=1;2=0;else=NA")

#education level
brfss_12$lths<-Recode(brfss_12$educa, recodes="1:3=1;9=NA; else=0", as.factor=F)
brfss_12$coll<-Recode(brfss_12$educa, recodes="5:6=1;9=NA; else=0", as.factor=F)

#employment
brfss_12$employ<-Recode(brfss_12$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
#brfss_12$employ<-relevel(brfss_12$employ, ref='Employed')

#marital status
brfss_12$marst<-Recode(brfss_12$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
brfss_12$marst<-relevel(brfss_12$marst, ref='married')

#income
brfss_12$inc<-as.factor(ifelse(brfss_12$incomg==9, NA, brfss_12$incomg))

#Age cut into intervals
brfss_12$agec<-cut(brfss_12$age, breaks=c(0,24,39,59,79,99))

brfss_12$agez<-as.numeric(scale(brfss_12$age, scale =T,center=T))

brfss_12$bmiz<-as.numeric(scale(brfss_12$bmi, scale = T , center=T))

brfss_12$male<-ifelse(brfss_12$sex==1, 1, 0)
brfss_12<-brfss_12%>%
  #select(!mrace)%>%
  select(cofips,male, agec, agez, bmi, bmiz, black, hispanic, other,  marst, employ, lths, coll, badhealth, avgdrnk)%>%
  filter(complete.cases(.))

Higher level (Macro) variables

To measure macro level variables, I include some Census variables from the ACS 2011 5 Year estimates load in ACS data from tidycensus. The first set of variables includes information on the economic conditions of the county, specifically poverty and unemployment.

library(tidycensus)
usacs<-get_acs(geography = "county", year = 2011,
                variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
                             "DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
                             "DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
                             "DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
                             "DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
                             "DP05_0066PE","DP05_0033","DP05_0032", "DP05_0072PE", "DP02_0113PE",  "DP04_0003PE") ,
                             summary_var = "B01001_001",
                             geometry = F, output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Using the ACS Data Profile
## Using the ACS Data Profile
usacs<-usacs%>%
  mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE/100, nwhite=DP05_0032E,nblack=DP05_0033E,
         pblack=DP05_0073PE/100 , phisp=DP05_0066PE/100, pfemhh=DP02_0008PE/100,
         phsormore=DP02_0066PE/100,punemp=DP03_0009PE/100, medhhinc=DP03_0062E,
         ppov=DP03_0119PE/100, pforn=DP02_0092PE/100,plep=DP02_0113PE/100,  pvacant=DP04_0003PE/100)%>%
  select(GEOID,NAME, totpop, pwhite, pblack, phisp, pfemhh, phsormore, punemp, medhhinc, ppov, pforn,plep, pvacant)
load(url("https://github.com/coreysparks/data/blob/master/segregation_county.Rdata?raw=true"))

#Merge files by county FIPS codes
joindata<-merge(usacs, county_dat, by.x="GEOID", by.y="cofips", all.x=T)

Clean and standardize contextual data

myscale<-function(x){as.numeric(scale(x))}
joindata<-joindata %>%
  mutate_at(c("ppov","punemp", "pblack","phisp", "pfemhh", "phsormore", "medhhinc", "pvacant", "dissim_wb", "isolation_b"),myscale)

joindata<-joindata%>%filter(complete.cases(.))

Now, I merge the data back to the individual level data:

merged<-merge(x=brfss_12, y=usacs, by.x="cofips", by.y="GEOID")
merged<-merged%>%
  filter(complete.cases(.))

Models

Random intercept model.

Model 1 - individual level effects

fit.mix<-lmer(as.numeric(avgdrnk)~male+agez+lths+coll+black+hispanic+other+(1|cofips), data=merged)

library(MuMIn)
r.squaredGLMM(fit.mix)
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help
## page.
##            R2m      R2c
## [1,] 0.1023364 0.107177
#cor(fitted(fit.mix),fit.mix@frame$as.numeric(avgdrnk)^2)

#Tests of coefficitns
display(fit.mix, detail=T, digits=4)
## lmer(formula = as.numeric(avgdrnk) ~ male + agez + lths + coll + 
##     black + hispanic + other + (1 | cofips), data = merged)
##             coef.est coef.se  t value 
## (Intercept)   2.0938   0.0172 121.9522
## male          0.7090   0.0113  63.0180
## agez         -0.4408   0.0062 -71.4467
## lths          0.4592   0.0316  14.5190
## coll         -0.4941   0.0141 -35.0774
## black1       -0.1084   0.0232  -4.6736
## hispanic1     0.2706   0.0248  10.9019
## other1        0.1621   0.0248   6.5312
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.1376  
##  Residual             1.8692  
## ---
## number of obs: 111662, groups: cofips, 209
## AIC = 456904, DIC = 456781.7
## deviance = 456833.1
#confidence intervals for the Odds ratio
exp(confint(fit.mix, method="Wald"))
##                 2.5 %    97.5 %
## .sig01             NA        NA
## .sigma             NA        NA
## (Intercept) 7.8472700 8.3935836
## male        1.9876624 2.0772857
## agez        0.6357819 0.6513457
## lths        1.4876587 1.6840121
## coll        0.5935317 0.6272230
## black1      0.8574139 0.9390086
## hispanic1   1.2484952 1.3760699
## other1      1.1201151 1.2345438

The r-squared value explains around 11 percent variation, which is acceptable. We can see that, males show a positive association with average alcoholic drinks per day in past 30 days. With age, the consumption of average drinks decreases. With increasing education, average alcoholic drinks per day in past 30 days decrease as well.

Model 2 - Here is a model that only considers the macro level variable on the outcome:

fit.mix.mac<-lmer(as.numeric(avgdrnk)~ppov+pblack+phsormore+(1|cofips),data=merged)


r.squaredGLMM(fit.mix.mac)
##             R2m        R2c
## [1,] 0.00224927 0.01046793
display(fit.mix.mac, detail=T, digits=4)
## lmer(formula = as.numeric(avgdrnk) ~ ppov + pblack + phsormore + 
##     (1 | cofips), data = merged)
##             coef.est coef.se t value
## (Intercept)  2.2584   0.4424  5.1049
## ppov         2.3662   0.5889  4.0180
## pblack      -0.1802   0.1248 -1.4441
## phsormore   -0.3629   0.4542 -0.7991
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.1795  
##  Residual             1.9699  
## ---
## number of obs: 111662, groups: cofips, 209
## AIC = 468637, DIC = 468606.7
## deviance = 468615.7
#confidence intervals for the Odds ratio
exp(confint(fit.mix.mac, method="Wald"))
##                 2.5 %    97.5 %
## .sig01             NA        NA
## .sigma             NA        NA
## (Intercept) 4.0202361 22.772518
## ppov        3.3601747 33.799470
## pblack      0.6539597  1.066474
## phsormore   0.2856069  1.694296

So, in counties that have higher poverty rates, the chance of average alcoholic drinks per day in past 30 days is higher.

Model 3 - multi-level model to include individual effects and the poverty rate at the county level:

fit.mix.ml<-lmer(as.numeric(avgdrnk)~male+agez+lths+coll+black+hispanic+other+ppov+pblack+phsormore+(1|cofips), data=merged)

r.squaredGLMM(fit.mix.ml)
##           R2m       R2c
## [1,] 0.103551 0.1077953
display(fit.mix.ml, detail=T, digits=4)
## lmer(formula = as.numeric(avgdrnk) ~ male + agez + lths + coll + 
##     black + hispanic + other + ppov + pblack + phsormore + (1 | 
##     cofips), data = merged)
##             coef.est coef.se  t value 
## (Intercept)   1.6980   0.3440   4.9363
## male          0.7090   0.0113  63.0230
## agez         -0.4408   0.0062 -71.4673
## lths          0.4568   0.0316  14.4439
## coll         -0.4941   0.0141 -35.0850
## black1       -0.1183   0.0236  -5.0097
## hispanic1     0.2608   0.0250  10.4463
## other1        0.1614   0.0248   6.5117
## ppov          1.6985   0.4566   3.7200
## pblack       -0.0082   0.0988  -0.0828
## phsormore     0.2757   0.3533   0.7802
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.1289  
##  Residual             1.8692  
## ---
## number of obs: 111662, groups: cofips, 209
## AIC = 456888, DIC = 456751.4
## deviance = 456806.6
#confidence intervals for the Odds ratio
exp(confint(fit.mix.ml, method="Wald"))
##                 2.5 %     97.5 %
## .sig01             NA         NA
## .sigma             NA         NA
## (Intercept) 2.7837823 10.7210917
## male        1.9877000  2.0773201
## agez        0.6357660  0.6513259
## lths        1.4841560  1.6800554
## coll        0.5934802  0.6271672
## black1      0.8482375  0.9305104
## hispanic1   1.2360195  1.3631157
## other1      1.1194597  1.2337146
## ppov        2.2335883 13.3748353
## pblack      0.8172777  1.2037310
## phsormore   0.6591199  2.6331900

The individual effects remains the same.

These results show, that net of individual level variables, the county level predictors do not tell us much about average alcoholic drinks per day in past 30 days.

Comparison of Models

#Model 1 to Model 3
anova(fit.mix, fit.mix.ml)
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit.mix: as.numeric(avgdrnk) ~ male + agez + lths + coll + black + hispanic + 
## fit.mix:     other + (1 | cofips)
## fit.mix.ml: as.numeric(avgdrnk) ~ male + agez + lths + coll + black + hispanic + 
## fit.mix.ml:     other + ppov + pblack + phsormore + (1 | cofips)
##            Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## fit.mix    10 456853 456949 -228417   456833                             
## fit.mix.ml 13 456833 456958 -228403   456807 26.435      3  7.731e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The second model with county level predictors seem to be significant. However, there is not much justification in including the higher level predictors.

Hypothesis

A hypothesis from the findings above:

H1: Racial/ethnic minorities living in High poverty areas face added risk of having more than average alcoholic drinks per day in past 30 days.

Model 4

Cross level interaction model between poverty and racial, using the individual level race variables as my micro level variable of interest.

fit.mix.cl<-lmer(as.numeric(avgdrnk)~male+agez+lths+coll+(black+hispanic+other)*ppov+ (1|cofips), data=merged)
display(fit.mix.cl, detail=T)
## lmer(formula = as.numeric(avgdrnk) ~ male + agez + lths + coll + 
##     (black + hispanic + other) * ppov + (1 | cofips), data = merged)
##                coef.est coef.se t value
## (Intercept)      1.99     0.03   63.47 
## male             0.71     0.01   63.07 
## agez            -0.44     0.01  -71.52 
## lths             0.45     0.03   14.37 
## coll            -0.49     0.01  -35.04 
## black1          -0.26     0.06   -4.06 
## hispanic1        0.13     0.06    2.14 
## other1           0.08     0.06    1.28 
## ppov             1.07     0.30    3.62 
## black1:ppov      1.34     0.55    2.44 
## hispanic1:ppov   1.23     0.55    2.25 
## other1:ppov      0.88     0.66    1.33 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept) 0.13    
##  Residual             1.87    
## ---
## number of obs: 111662, groups: cofips, 209
## AIC = 456874, DIC = 456746.5
## deviance = 456796.5
exp(confint(fit.mix.cl, method="Wald"))
##                    2.5 %     97.5 %
## .sig01                NA         NA
## .sigma                NA         NA
## (Intercept)    6.9103125  7.8162075
## male           1.9888634  2.0785449
## agez           0.6355690  0.6511239
## lths           1.4806222  1.6760992
## coll           0.5939410  0.6276481
## black1         0.6765550  0.8726949
## hispanic1      1.0111039  1.2908974
## other1         0.9571926  1.2312190
## ppov           1.6340323  5.2282596
## black1:ppov    1.3043798 11.1872326
## hispanic1:ppov 1.1749199 10.0546896
## other1:ppov    0.6613498  8.8286392

It shows that blacks and hispanics are in advantageous position in terms of average alcoholic drinks per day in past 30 days in counties that have higher poverty rates. There is evidence that the interactions are significant, however, the confidence intervals are noticeably wide.

Comparison of Models

#Model 3 to Model 4
anova(fit.mix.ml, fit.mix.cl)
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit.mix.ml: as.numeric(avgdrnk) ~ male + agez + lths + coll + black + hispanic + 
## fit.mix.ml:     other + ppov + pblack + phsormore + (1 | cofips)
## fit.mix.cl: as.numeric(avgdrnk) ~ male + agez + lths + coll + (black + hispanic + 
## fit.mix.cl:     other) * ppov + (1 | cofips)
##            Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)   
## fit.mix.ml 13 456833 456958 -228403   456807                            
## fit.mix.cl 14 456824 456959 -228398   456796 10.179      1   0.001421 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The cross-level interaction model is seen to be significant.

Fitted Values

Using the highest, an average and the lowest intercept counties to illustrate the differences.

which.max(ranef(fit.mix.cl)$cofips$`(Intercept)`)
## [1] 39
which(ranef(fit.mix.cl)$cofips$`(Intercept)`> -.01 &ranef(fit.mix.cl)$cofips$`(Intercept)` <.01)
##  [1]   4  50  66  67  82 100 109 125 139 148 174 176 186
which.min(ranef(fit.mix.cl)$cofips$`(Intercept)`)
## [1] 205
rownames(ranef(fit.mix.cl)$cofips)[c(39, 4, 205)]
## [1] "15003" "02020" "53077"
merged$NAME[15003]
## [1] "New Haven County, Connecticut"
merged$NAME[02020]
## [1] "Maricopa County, Arizona"
merged$NAME[53077]
## [1] "Hennepin County, Minnesota"

Getting fitted values out of cross-level interaction model

#Get my different types of people from the fixed effects
newdat1<-expand.grid(male = c(0,1),agez=0, lths=0, coll=0, 
                     black=levels(merged$black),
                     hispanic=levels(merged$hispanic),
                     other=levels(merged$other))
#remove some nonsensicals, people who are multiple ethnicities, etc.
newdat1<-newdat1[c( -7:-8, -15:-16),]

#I need 3 counties worth of people
newdat1<-rbind(newdat1, newdat1, newdat1)
#Put in the county identifier
newdat1$cofips<-c(rep("15003", 12), rep("02020", 12), rep("53077", 12))
#get the poverty z score for these 3 counties
povz <- joindata[joindata$GEOID %in%c("15003", "020202", "53077") , "ppov"]

newdat1$ppov<-c(rep(povz[1], 12), rep(povz[2], 12), rep(povz[3], 12))

#Here's our three counties of example cases:
newdat1
##     male agez lths coll black hispanic other cofips       ppov
## 1      0    0    0    0     0        0     0  15003 -0.7673104
## 2      1    0    0    0     0        0     0  15003 -0.7673104
## 3      0    0    0    0     1        0     0  15003 -0.7673104
## 4      1    0    0    0     1        0     0  15003 -0.7673104
## 5      0    0    0    0     0        1     0  15003 -0.7673104
## 6      1    0    0    0     0        1     0  15003 -0.7673104
## 9      0    0    0    0     0        0     1  15003 -0.7673104
## 10     1    0    0    0     0        0     1  15003 -0.7673104
## 11     0    0    0    0     1        0     1  15003 -0.7673104
## 12     1    0    0    0     1        0     1  15003 -0.7673104
## 13     0    0    0    0     0        1     1  15003 -0.7673104
## 14     1    0    0    0     0        1     1  15003 -0.7673104
## 15     0    0    0    0     0        0     0  02020  0.5020819
## 21     1    0    0    0     0        0     0  02020  0.5020819
## 31     0    0    0    0     1        0     0  02020  0.5020819
## 41     1    0    0    0     1        0     0  02020  0.5020819
## 51     0    0    0    0     0        1     0  02020  0.5020819
## 61     1    0    0    0     0        1     0  02020  0.5020819
## 91     0    0    0    0     0        0     1  02020  0.5020819
## 101    1    0    0    0     0        0     1  02020  0.5020819
## 111    0    0    0    0     1        0     1  02020  0.5020819
## 121    1    0    0    0     1        0     1  02020  0.5020819
## 131    0    0    0    0     0        1     1  02020  0.5020819
## 141    1    0    0    0     0        1     1  02020  0.5020819
## 16     0    0    0    0     0        0     0  53077         NA
## 22     1    0    0    0     0        0     0  53077         NA
## 32     0    0    0    0     1        0     0  53077         NA
## 42     1    0    0    0     1        0     0  53077         NA
## 52     0    0    0    0     0        1     0  53077         NA
## 62     1    0    0    0     0        1     0  53077         NA
## 92     0    0    0    0     0        0     1  53077         NA
## 102    1    0    0    0     0        0     1  53077         NA
## 112    0    0    0    0     1        0     1  53077         NA
## 122    1    0    0    0     1        0     1  53077         NA
## 132    0    0    0    0     0        1     1  53077         NA
## 142    1    0    0    0     0        1     1  53077         NA
newdat1$fitted<-predict(fit.mix.cl,newdata=newdat1, type="response")

newdat1
##     male agez lths coll black hispanic other cofips       ppov     fitted
## 1      0    0    0    0     0        0     0  15003 -0.7673104  1.6852040
## 2      1    0    0    0     0        0     0  15003 -0.7673104  2.3948197
## 3      0    0    0    0     1        0     0  15003 -0.7673104  0.3933605
## 4      1    0    0    0     1        0     0  15003 -0.7673104  1.1029762
## 5      0    0    0    0     0        1     0  15003 -0.7673104  0.8710577
## 6      1    0    0    0     0        1     0  15003 -0.7673104  1.5806734
## 9      0    0    0    0     0        0     1  15003 -0.7673104  1.0903605
## 10     1    0    0    0     0        0     1  15003 -0.7673104  1.7999762
## 11     0    0    0    0     1        0     1  15003 -0.7673104 -0.2014830
## 12     1    0    0    0     1        0     1  15003 -0.7673104  0.5081327
## 13     0    0    0    0     0        1     1  15003 -0.7673104  0.2762142
## 14     1    0    0    0     0        1     1  15003 -0.7673104  0.9858299
## 15     0    0    0    0     0        0     0  02020  0.5020819  2.5362932
## 21     1    0    0    0     0        0     0  02020  0.5020819  3.2459088
## 31     0    0    0    0     1        0     0  02020  0.5020819  2.9457533
## 41     1    0    0    0     1        0     0  02020  0.5020819  3.6553689
## 51     0    0    0    0     0        1     0  02020  0.5020819  3.2893635
## 61     1    0    0    0     0        1     0  02020  0.5020819  3.9989791
## 91     0    0    0    0     0        0     1  02020  0.5020819  3.0613891
## 101    1    0    0    0     0        0     1  02020  0.5020819  3.7710048
## 111    0    0    0    0     1        0     1  02020  0.5020819  3.4708492
## 121    1    0    0    0     1        0     1  02020  0.5020819  4.1804649
## 131    0    0    0    0     0        1     1  02020  0.5020819  3.8144594
## 141    1    0    0    0     0        1     1  02020  0.5020819  4.5240751
## 16     0    0    0    0     0        0     0  53077         NA         NA
## 22     1    0    0    0     0        0     0  53077         NA         NA
## 32     0    0    0    0     1        0     0  53077         NA         NA
## 42     1    0    0    0     1        0     0  53077         NA         NA
## 52     0    0    0    0     0        1     0  53077         NA         NA
## 62     1    0    0    0     0        1     0  53077         NA         NA
## 92     0    0    0    0     0        0     1  53077         NA         NA
## 102    1    0    0    0     0        0     1  53077         NA         NA
## 112    0    0    0    0     1        0     1  53077         NA         NA
## 122    1    0    0    0     1        0     1  53077         NA         NA
## 132    0    0    0    0     0        1     1  53077         NA         NA
## 142    1    0    0    0     0        1     1  53077         NA         NA