My outcome of interest is a person’s average alcoholic drinks per day in past 30 days, measured by asking, ‘during the past 30 days, on the days when you drank, about how many drinks did you drink on the average? (A 40 ounce beer would count as 3 drinks, or a cocktail drink with 2 shots would count as 2 drinks)’. For this homework, I am using 2012 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART county data and merging it to the 2010 American Community Survey 5-year estimates at the county level to get the higher level predictors.
library(lme4)
library(arm)
library(car)
library(haven)
library(tidycensus)
library(dplyr)
library(acs)
library(censusapi)
library(tidyverse)
library(sf)
brfss_12<-read_xpt("C://users/munta/Google Drive/dem7473/data/CNTY12.xpt")
nams<-names(brfss_12)
newnames<-gsub("_", "", nams)
names(brfss_12)<-make.names(tolower(newnames), unique = T)
brfss_12$statefip<-sprintf("%02d", brfss_12$state )
brfss_12$cofip<-sprintf("%03d", brfss_12$cnty )
brfss_12$cofips<-paste(brfss_12$statefip, brfss_12$cofip, sep="")
#Avg alcoholic drinks per day in past 30
#One drink is equivalent to a 12-ounce beer, a 5-ounce glass of wine, or a drink with one shot of liquor. During the past 30 days, on the days when you drank, about how many drinks did you drink on the average?
brfss_12$avgdrnk<-Recode(brfss_12$avedrnk2, recodes="77=NA; 99=NA", as.factor=T)
#insurance
brfss_12$ins<-ifelse(brfss_12$hlthpln1==1,1,0)
#smoking currently
brfss_12$smoke<-Recode(brfss_12$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#bmi
brfss_12$bmi<-ifelse(is.na(brfss_12$bmi5)==T, NA, brfss_12$bmi5/100)
#poor or fair health
brfss_12$badhealth<-ifelse(brfss_12$genhlth %in% c(4,5),1,0)
#race
brfss_12$black<-Recode(brfss_12$racegr2, recodes="2=1; 9=NA; else=0", as.factor=T)
brfss_12$white<-Recode(brfss_12$racegr2, recodes="1=1; 9=NA; else=0", as.factor=T)
brfss_12$other<-Recode(brfss_12$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor=T)
brfss_12$hispanic<-Recode(brfss_12$racegr2, recodes="5=1; 9=NA; else=0", as.factor=T)
#have a personal doctor?
brfss_12$doc<-Recode(brfss_12$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor=F)
#needed care in last year but couldn't get it because of cost
brfss_12$medacc<-Recode(brfss_12$medcost, recodes="1=1;2=0;else=NA")
#education level
brfss_12$lths<-Recode(brfss_12$educa, recodes="1:3=1;9=NA; else=0", as.factor=F)
brfss_12$coll<-Recode(brfss_12$educa, recodes="5:6=1;9=NA; else=0", as.factor=F)
#employment
brfss_12$employ<-Recode(brfss_12$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
#brfss_12$employ<-relevel(brfss_12$employ, ref='Employed')
#marital status
brfss_12$marst<-Recode(brfss_12$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
brfss_12$marst<-relevel(brfss_12$marst, ref='married')
#income
brfss_12$inc<-as.factor(ifelse(brfss_12$incomg==9, NA, brfss_12$incomg))
#Age cut into intervals
brfss_12$agec<-cut(brfss_12$age, breaks=c(0,24,39,59,79,99))
brfss_12$agez<-as.numeric(scale(brfss_12$age, scale =T,center=T))
brfss_12$bmiz<-as.numeric(scale(brfss_12$bmi, scale = T , center=T))
brfss_12$male<-ifelse(brfss_12$sex==1, 1, 0)
brfss_12<-brfss_12%>%
#select(!mrace)%>%
select(cofips,male, agec, agez, bmi, bmiz, black, hispanic, other, marst, employ, lths, coll, badhealth, avgdrnk)%>%
filter(complete.cases(.))
To measure macro level variables, I include some Census variables from the ACS 2011 5 Year estimates load in ACS data from tidycensus. The first set of variables includes information on the economic conditions of the county, specifically poverty and unemployment.
library(tidycensus)
usacs<-get_acs(geography = "county", year = 2011,
variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
"DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
"DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
"DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
"DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
"DP05_0066PE","DP05_0033","DP05_0032", "DP05_0072PE", "DP02_0113PE", "DP04_0003PE") ,
summary_var = "B01001_001",
geometry = F, output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Using the ACS Data Profile
## Using the ACS Data Profile
usacs<-usacs%>%
mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE/100, nwhite=DP05_0032E,nblack=DP05_0033E,
pblack=DP05_0073PE/100 , phisp=DP05_0066PE/100, pfemhh=DP02_0008PE/100,
phsormore=DP02_0066PE/100,punemp=DP03_0009PE/100, medhhinc=DP03_0062E,
ppov=DP03_0119PE/100, pforn=DP02_0092PE/100,plep=DP02_0113PE/100, pvacant=DP04_0003PE/100)%>%
select(GEOID,NAME, totpop, pwhite, pblack, phisp, pfemhh, phsormore, punemp, medhhinc, ppov, pforn,plep, pvacant)
load(url("https://github.com/coreysparks/data/blob/master/segregation_county.Rdata?raw=true"))
#Merge files by county FIPS codes
joindata<-merge(usacs, county_dat, by.x="GEOID", by.y="cofips", all.x=T)
myscale<-function(x){as.numeric(scale(x))}
joindata<-joindata %>%
mutate_at(c("ppov","punemp", "pblack","phisp", "pfemhh", "phsormore", "medhhinc", "pvacant", "dissim_wb", "isolation_b"),myscale)
joindata<-joindata%>%filter(complete.cases(.))
Now, I merge the data back to the individual level data:
merged<-merge(x=brfss_12, y=usacs, by.x="cofips", by.y="GEOID")
merged<-merged%>%
filter(complete.cases(.))
fit.mix<-lmer(as.numeric(avgdrnk)~male+agez+lths+coll+black+hispanic+other+(1|cofips), data=merged)
library(MuMIn)
r.squaredGLMM(fit.mix)
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help
## page.
## R2m R2c
## [1,] 0.1023364 0.107177
#cor(fitted(fit.mix),fit.mix@frame$as.numeric(avgdrnk)^2)
#Tests of coefficitns
display(fit.mix, detail=T, digits=4)
## lmer(formula = as.numeric(avgdrnk) ~ male + agez + lths + coll +
## black + hispanic + other + (1 | cofips), data = merged)
## coef.est coef.se t value
## (Intercept) 2.0938 0.0172 121.9522
## male 0.7090 0.0113 63.0180
## agez -0.4408 0.0062 -71.4467
## lths 0.4592 0.0316 14.5190
## coll -0.4941 0.0141 -35.0774
## black1 -0.1084 0.0232 -4.6736
## hispanic1 0.2706 0.0248 10.9019
## other1 0.1621 0.0248 6.5312
##
## Error terms:
## Groups Name Std.Dev.
## cofips (Intercept) 0.1376
## Residual 1.8692
## ---
## number of obs: 111662, groups: cofips, 209
## AIC = 456904, DIC = 456781.7
## deviance = 456833.1
#confidence intervals for the Odds ratio
exp(confint(fit.mix, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 7.8472700 8.3935836
## male 1.9876624 2.0772857
## agez 0.6357819 0.6513457
## lths 1.4876587 1.6840121
## coll 0.5935317 0.6272230
## black1 0.8574139 0.9390086
## hispanic1 1.2484952 1.3760699
## other1 1.1201151 1.2345438
The r-squared value explains around 11 percent variation, which is acceptable. We can see that, males show a positive association with average alcoholic drinks per day in past 30 days. With age, the consumption of average drinks decreases. With increasing education, average alcoholic drinks per day in past 30 days decrease as well.
fit.mix.mac<-lmer(as.numeric(avgdrnk)~ppov+pblack+phsormore+(1|cofips),data=merged)
r.squaredGLMM(fit.mix.mac)
## R2m R2c
## [1,] 0.00224927 0.01046793
display(fit.mix.mac, detail=T, digits=4)
## lmer(formula = as.numeric(avgdrnk) ~ ppov + pblack + phsormore +
## (1 | cofips), data = merged)
## coef.est coef.se t value
## (Intercept) 2.2584 0.4424 5.1049
## ppov 2.3662 0.5889 4.0180
## pblack -0.1802 0.1248 -1.4441
## phsormore -0.3629 0.4542 -0.7991
##
## Error terms:
## Groups Name Std.Dev.
## cofips (Intercept) 0.1795
## Residual 1.9699
## ---
## number of obs: 111662, groups: cofips, 209
## AIC = 468637, DIC = 468606.7
## deviance = 468615.7
#confidence intervals for the Odds ratio
exp(confint(fit.mix.mac, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 4.0202361 22.772518
## ppov 3.3601747 33.799470
## pblack 0.6539597 1.066474
## phsormore 0.2856069 1.694296
So, in counties that have higher poverty rates, the chance of average alcoholic drinks per day in past 30 days is higher.
fit.mix.ml<-lmer(as.numeric(avgdrnk)~male+agez+lths+coll+black+hispanic+other+ppov+pblack+phsormore+(1|cofips), data=merged)
r.squaredGLMM(fit.mix.ml)
## R2m R2c
## [1,] 0.103551 0.1077953
display(fit.mix.ml, detail=T, digits=4)
## lmer(formula = as.numeric(avgdrnk) ~ male + agez + lths + coll +
## black + hispanic + other + ppov + pblack + phsormore + (1 |
## cofips), data = merged)
## coef.est coef.se t value
## (Intercept) 1.6980 0.3440 4.9363
## male 0.7090 0.0113 63.0230
## agez -0.4408 0.0062 -71.4673
## lths 0.4568 0.0316 14.4439
## coll -0.4941 0.0141 -35.0850
## black1 -0.1183 0.0236 -5.0097
## hispanic1 0.2608 0.0250 10.4463
## other1 0.1614 0.0248 6.5117
## ppov 1.6985 0.4566 3.7200
## pblack -0.0082 0.0988 -0.0828
## phsormore 0.2757 0.3533 0.7802
##
## Error terms:
## Groups Name Std.Dev.
## cofips (Intercept) 0.1289
## Residual 1.8692
## ---
## number of obs: 111662, groups: cofips, 209
## AIC = 456888, DIC = 456751.4
## deviance = 456806.6
#confidence intervals for the Odds ratio
exp(confint(fit.mix.ml, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 2.7837823 10.7210917
## male 1.9877000 2.0773201
## agez 0.6357660 0.6513259
## lths 1.4841560 1.6800554
## coll 0.5934802 0.6271672
## black1 0.8482375 0.9305104
## hispanic1 1.2360195 1.3631157
## other1 1.1194597 1.2337146
## ppov 2.2335883 13.3748353
## pblack 0.8172777 1.2037310
## phsormore 0.6591199 2.6331900
The individual effects remains the same.
These results show, that net of individual level variables, the county level predictors do not tell us much about average alcoholic drinks per day in past 30 days.
#Model 1 to Model 3
anova(fit.mix, fit.mix.ml)
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit.mix: as.numeric(avgdrnk) ~ male + agez + lths + coll + black + hispanic +
## fit.mix: other + (1 | cofips)
## fit.mix.ml: as.numeric(avgdrnk) ~ male + agez + lths + coll + black + hispanic +
## fit.mix.ml: other + ppov + pblack + phsormore + (1 | cofips)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.mix 10 456853 456949 -228417 456833
## fit.mix.ml 13 456833 456958 -228403 456807 26.435 3 7.731e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The second model with county level predictors seem to be significant. However, there is not much justification in including the higher level predictors.
A hypothesis from the findings above:
H1: Racial/ethnic minorities living in High poverty areas face added risk of having more than average alcoholic drinks per day in past 30 days.
fit.mix.cl<-lmer(as.numeric(avgdrnk)~male+agez+lths+coll+(black+hispanic+other)*ppov+ (1|cofips), data=merged)
display(fit.mix.cl, detail=T)
## lmer(formula = as.numeric(avgdrnk) ~ male + agez + lths + coll +
## (black + hispanic + other) * ppov + (1 | cofips), data = merged)
## coef.est coef.se t value
## (Intercept) 1.99 0.03 63.47
## male 0.71 0.01 63.07
## agez -0.44 0.01 -71.52
## lths 0.45 0.03 14.37
## coll -0.49 0.01 -35.04
## black1 -0.26 0.06 -4.06
## hispanic1 0.13 0.06 2.14
## other1 0.08 0.06 1.28
## ppov 1.07 0.30 3.62
## black1:ppov 1.34 0.55 2.44
## hispanic1:ppov 1.23 0.55 2.25
## other1:ppov 0.88 0.66 1.33
##
## Error terms:
## Groups Name Std.Dev.
## cofips (Intercept) 0.13
## Residual 1.87
## ---
## number of obs: 111662, groups: cofips, 209
## AIC = 456874, DIC = 456746.5
## deviance = 456796.5
exp(confint(fit.mix.cl, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 6.9103125 7.8162075
## male 1.9888634 2.0785449
## agez 0.6355690 0.6511239
## lths 1.4806222 1.6760992
## coll 0.5939410 0.6276481
## black1 0.6765550 0.8726949
## hispanic1 1.0111039 1.2908974
## other1 0.9571926 1.2312190
## ppov 1.6340323 5.2282596
## black1:ppov 1.3043798 11.1872326
## hispanic1:ppov 1.1749199 10.0546896
## other1:ppov 0.6613498 8.8286392
It shows that blacks and hispanics are in advantageous position in terms of average alcoholic drinks per day in past 30 days in counties that have higher poverty rates. There is evidence that the interactions are significant, however, the confidence intervals are noticeably wide.
#Model 3 to Model 4
anova(fit.mix.ml, fit.mix.cl)
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit.mix.ml: as.numeric(avgdrnk) ~ male + agez + lths + coll + black + hispanic +
## fit.mix.ml: other + ppov + pblack + phsormore + (1 | cofips)
## fit.mix.cl: as.numeric(avgdrnk) ~ male + agez + lths + coll + (black + hispanic +
## fit.mix.cl: other) * ppov + (1 | cofips)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.mix.ml 13 456833 456958 -228403 456807
## fit.mix.cl 14 456824 456959 -228398 456796 10.179 1 0.001421 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The cross-level interaction model is seen to be significant.
Using the highest, an average and the lowest intercept counties to illustrate the differences.
which.max(ranef(fit.mix.cl)$cofips$`(Intercept)`)
## [1] 39
which(ranef(fit.mix.cl)$cofips$`(Intercept)`> -.01 &ranef(fit.mix.cl)$cofips$`(Intercept)` <.01)
## [1] 4 50 66 67 82 100 109 125 139 148 174 176 186
which.min(ranef(fit.mix.cl)$cofips$`(Intercept)`)
## [1] 205
rownames(ranef(fit.mix.cl)$cofips)[c(39, 4, 205)]
## [1] "15003" "02020" "53077"
merged$NAME[15003]
## [1] "New Haven County, Connecticut"
merged$NAME[02020]
## [1] "Maricopa County, Arizona"
merged$NAME[53077]
## [1] "Hennepin County, Minnesota"
#Get my different types of people from the fixed effects
newdat1<-expand.grid(male = c(0,1),agez=0, lths=0, coll=0,
black=levels(merged$black),
hispanic=levels(merged$hispanic),
other=levels(merged$other))
#remove some nonsensicals, people who are multiple ethnicities, etc.
newdat1<-newdat1[c( -7:-8, -15:-16),]
#I need 3 counties worth of people
newdat1<-rbind(newdat1, newdat1, newdat1)
#Put in the county identifier
newdat1$cofips<-c(rep("15003", 12), rep("02020", 12), rep("53077", 12))
#get the poverty z score for these 3 counties
povz <- joindata[joindata$GEOID %in%c("15003", "020202", "53077") , "ppov"]
newdat1$ppov<-c(rep(povz[1], 12), rep(povz[2], 12), rep(povz[3], 12))
#Here's our three counties of example cases:
newdat1
## male agez lths coll black hispanic other cofips ppov
## 1 0 0 0 0 0 0 0 15003 -0.7673104
## 2 1 0 0 0 0 0 0 15003 -0.7673104
## 3 0 0 0 0 1 0 0 15003 -0.7673104
## 4 1 0 0 0 1 0 0 15003 -0.7673104
## 5 0 0 0 0 0 1 0 15003 -0.7673104
## 6 1 0 0 0 0 1 0 15003 -0.7673104
## 9 0 0 0 0 0 0 1 15003 -0.7673104
## 10 1 0 0 0 0 0 1 15003 -0.7673104
## 11 0 0 0 0 1 0 1 15003 -0.7673104
## 12 1 0 0 0 1 0 1 15003 -0.7673104
## 13 0 0 0 0 0 1 1 15003 -0.7673104
## 14 1 0 0 0 0 1 1 15003 -0.7673104
## 15 0 0 0 0 0 0 0 02020 0.5020819
## 21 1 0 0 0 0 0 0 02020 0.5020819
## 31 0 0 0 0 1 0 0 02020 0.5020819
## 41 1 0 0 0 1 0 0 02020 0.5020819
## 51 0 0 0 0 0 1 0 02020 0.5020819
## 61 1 0 0 0 0 1 0 02020 0.5020819
## 91 0 0 0 0 0 0 1 02020 0.5020819
## 101 1 0 0 0 0 0 1 02020 0.5020819
## 111 0 0 0 0 1 0 1 02020 0.5020819
## 121 1 0 0 0 1 0 1 02020 0.5020819
## 131 0 0 0 0 0 1 1 02020 0.5020819
## 141 1 0 0 0 0 1 1 02020 0.5020819
## 16 0 0 0 0 0 0 0 53077 NA
## 22 1 0 0 0 0 0 0 53077 NA
## 32 0 0 0 0 1 0 0 53077 NA
## 42 1 0 0 0 1 0 0 53077 NA
## 52 0 0 0 0 0 1 0 53077 NA
## 62 1 0 0 0 0 1 0 53077 NA
## 92 0 0 0 0 0 0 1 53077 NA
## 102 1 0 0 0 0 0 1 53077 NA
## 112 0 0 0 0 1 0 1 53077 NA
## 122 1 0 0 0 1 0 1 53077 NA
## 132 0 0 0 0 0 1 1 53077 NA
## 142 1 0 0 0 0 1 1 53077 NA
newdat1$fitted<-predict(fit.mix.cl,newdata=newdat1, type="response")
newdat1
## male agez lths coll black hispanic other cofips ppov fitted
## 1 0 0 0 0 0 0 0 15003 -0.7673104 1.6852040
## 2 1 0 0 0 0 0 0 15003 -0.7673104 2.3948197
## 3 0 0 0 0 1 0 0 15003 -0.7673104 0.3933605
## 4 1 0 0 0 1 0 0 15003 -0.7673104 1.1029762
## 5 0 0 0 0 0 1 0 15003 -0.7673104 0.8710577
## 6 1 0 0 0 0 1 0 15003 -0.7673104 1.5806734
## 9 0 0 0 0 0 0 1 15003 -0.7673104 1.0903605
## 10 1 0 0 0 0 0 1 15003 -0.7673104 1.7999762
## 11 0 0 0 0 1 0 1 15003 -0.7673104 -0.2014830
## 12 1 0 0 0 1 0 1 15003 -0.7673104 0.5081327
## 13 0 0 0 0 0 1 1 15003 -0.7673104 0.2762142
## 14 1 0 0 0 0 1 1 15003 -0.7673104 0.9858299
## 15 0 0 0 0 0 0 0 02020 0.5020819 2.5362932
## 21 1 0 0 0 0 0 0 02020 0.5020819 3.2459088
## 31 0 0 0 0 1 0 0 02020 0.5020819 2.9457533
## 41 1 0 0 0 1 0 0 02020 0.5020819 3.6553689
## 51 0 0 0 0 0 1 0 02020 0.5020819 3.2893635
## 61 1 0 0 0 0 1 0 02020 0.5020819 3.9989791
## 91 0 0 0 0 0 0 1 02020 0.5020819 3.0613891
## 101 1 0 0 0 0 0 1 02020 0.5020819 3.7710048
## 111 0 0 0 0 1 0 1 02020 0.5020819 3.4708492
## 121 1 0 0 0 1 0 1 02020 0.5020819 4.1804649
## 131 0 0 0 0 0 1 1 02020 0.5020819 3.8144594
## 141 1 0 0 0 0 1 1 02020 0.5020819 4.5240751
## 16 0 0 0 0 0 0 0 53077 NA NA
## 22 1 0 0 0 0 0 0 53077 NA NA
## 32 0 0 0 0 1 0 0 53077 NA NA
## 42 1 0 0 0 1 0 0 53077 NA NA
## 52 0 0 0 0 0 1 0 53077 NA NA
## 62 1 0 0 0 0 1 0 53077 NA NA
## 92 0 0 0 0 0 0 1 53077 NA NA
## 102 1 0 0 0 0 0 1 53077 NA NA
## 112 0 0 0 0 1 0 1 53077 NA NA
## 122 1 0 0 0 1 0 1 53077 NA NA
## 132 0 0 0 0 0 1 1 53077 NA NA
## 142 1 0 0 0 0 1 1 53077 NA NA