#load brfss
library(car)
library(stargazer)
library(survey)
library(sjPlot)
library(ggplot2)
library(pander)
library(knitr)
library(tidycensus)
library(dplyr)
load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
set.seed(12345)
#samps<-sample(1:nrow(brfss_17), size = 40000, replace=F)
#brfss_17<-brfss_17[samps,]
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss_17)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",
replacement = "",
x = nams)
names(brfss_17)<-tolower(newnames)
#sex
brfss_17$male<-ifelse(brfss_17$sex==1, 1, 0)
#BMI
brfss_17$bmi<-ifelse(is.na(brfss_17$bmi5)==T, NA, brfss_17$bmi5/100)
#Healthy days
brfss_17$healthdays<-Recode(brfss_17$physhlth, recodes = "88=0; 77=NA; 99=NA")
#Healthy mental health days
brfss_17$healthmdays<-Recode(brfss_17$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")
brfss_17$race_eth<-Recode(brfss_17$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss_17$race_eth<-relevel(brfss_17$race_eth, ref = "nhwhite")
#insurance
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
#income grouping
brfss_17$inc<-Recode(brfss_17$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor = T)
brfss_17$inc<-as.ordered(brfss_17$inc)
#education level
brfss_17$educ<-Recode(brfss_17$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
brfss_17$educ<-relevel(brfss_17$educ, ref='2hsgrad')
#employment
brfss_17$employ<-Recode(brfss_17$employ1,
recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='employloyed')
#marital status
brfss_17$marst<-Recode(brfss_17$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst,
ref='married')
#Age cut into intervals
brfss_17$agec<-cut(brfss_17$age80,
breaks=c(0,24,39,59,79,99))
#BMI, in the brfss_17a the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's
brfss_17$bmi<-brfss_17$bmi5/100
#smoking currently
brfss_17$smoke<-Recode(brfss_17$smoker3,
recodes="1:2=1; 3:4=0; else=NA")
#brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")
brfss_17$obese<-ifelse(is.na(brfss_17$bmi)==T, NA,
ifelse(brfss_17$bmi>30,1,0))
usacs<-get_acs(geography = "metropolitan statistical area/micropolitan statistical area", year = 2011,
variables=c( "DP05_0001E",
"DP03_0062E",
"DP04_0003PE") ,
summary_var = "B01001_001",
geometry = F,
output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Using the ACS Data Profile
usacs<-usacs%>%
mutate(totpop= DP05_0001E,
medhhinc=DP03_0062E,
pvacant=DP04_0003PE/100)%>%
dplyr::select(GEOID,NAME, totpop, medhhinc, pvacant)
head(usacs)
myscale<-function(x){as.numeric(scale(x))}
usacs<-usacs %>%
mutate_at(c( "medhhinc", "pvacant"),myscale)
merged<-usacs%>%
filter(complete.cases(.) )
head(merged)
Now, I merge the data back to the individual level data:
merged<-merge(x=brfss_17,
y=merged,
by.x="mmsa",
by.y="GEOID",
all.x=F)
Merge the MSA data to the BRFSS data
merged$weightz<-as.numeric(scale(merged$weight2,
center=T,
scale=T))
merged<-merged%>%
dplyr::select(weightz,weight2, obese, mmsa, agec, educ, race_eth,smoke, healthmdays, badhealth,bmi,medhhinc,pvacant, male, mmsawt, mmsaname )%>%
filter(complete.cases(.)& weight2 < 7776)
summary(merged)
## weightz weight2 obese mmsa
## Min. :-0.30060 Min. : 60 Min. :0.0000 Min. :10100
## 1st Qu.:-0.25877 1st Qu.:149 1st Qu.:0.0000 1st Qu.:19740
## Median :-0.24654 Median :175 Median :0.0000 Median :31740
## Mean :-0.24417 Mean :180 Mean :0.3057 Mean :30161
## 3rd Qu.:-0.23338 3rd Qu.:203 3rd Qu.:1.0000 3rd Qu.:39300
## Max. :-0.04675 Max. :600 Max. :1.0000 Max. :49340
## agec educ race_eth smoke
## (0,24] : 9817 2hsgrad :39696 nhwhite :123432 Min. :0.0000
## (24,39]:26671 0Prim : 2721 hispanic : 13511 1st Qu.:0.0000
## (39,59]:49736 1somehs : 6456 nh black : 13871 Median :0.0000
## (59,79]:60346 3somecol:45063 nh multirace: 2958 Mean :0.1415
## (79,99]:12573 4colgrad:65207 nh other : 5371 3rd Qu.:0.0000
## Max. :1.0000
## healthmdays badhealth bmi medhhinc
## Min. : 0.000 Min. :0.0000 Min. :12.05 Min. :-2.4443
## 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:23.78 1st Qu.: 0.3763
## Median : 0.000 Median :0.0000 Median :27.12 Median : 0.9584
## Mean : 3.616 Mean :0.1775 Mean :28.11 Mean : 0.8860
## 3rd Qu.: 2.000 3rd Qu.:0.0000 3rd Qu.:31.17 3rd Qu.: 1.4043
## Max. :30.000 Max. :1.0000 Max. :99.31 Max. : 2.9512
## pvacant male mmsawt mmsaname
## Min. :-1.2577 Min. :0.0000 Min. : 0.15 Length:159143
## 1st Qu.:-0.8430 1st Qu.:0.0000 1st Qu.: 105.13 Class :character
## Median :-0.6141 Median :0.0000 Median : 256.41 Mode :character
## Mean :-0.4397 Mean :0.4549 Mean : 570.36
## 3rd Qu.:-0.2566 3rd Qu.:1.0000 3rd Qu.: 611.85
## Max. : 3.5045 Max. :1.0000 Max. :35483.51
head(merged[, c("weight2","race_eth","smoke" ,"male", "agec", "educ","medhhinc", "pvacant", "mmsa")])
meanweight<-mean(merged$weightz, na.rm=T)
sdweight<-sd(merged$weightz, na.rm=T)
Now we fit the hierarchical model
library(lme4)
library(lmerTest)
library(arm)
fit<-lmer(weightz~(1|mmsa),
data=merged)
arm::display(fit,
detail=T)
## lmer(formula = weightz ~ (1 | mmsa), data = merged)
## coef.est coef.se t value
## (Intercept) -0.24 0.00 -1358.95
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.00
## Residual 0.02
## ---
## number of obs: 159143, groups: mmsa, 118
## AIC = -773717, DIC = -773754.2
## deviance = -773738.7
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weightz ~ (1 | mmsa)
## Data: merged
##
## REML criterion at convergence: -773723.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7052 -0.7148 -0.1220 0.5294 9.3084
##
## Random effects:
## Groups Name Variance Std.Dev.
## mmsa (Intercept) 3.288e-06 0.001813
## Residual 4.522e-04 0.021265
## Number of obs: 159143, groups: mmsa, 118
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -2.439e-01 1.795e-04 1.175e+02 -1359 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(sjstats); library(sjPlot)
##
## Attaching package: 'sjstats'
## The following object is masked from 'package:survey':
##
## cv
icc(fit)
## Warning: 'icc' is deprecated.
## Use 'performance::icc()' instead.
## See help("Deprecated")
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.007
## Conditional ICC: 0.007
plot_model(fit,
type = "re",
sort.est="sort.all",
grid=F)
## Warning: `select_vars()` is deprecated as of dplyr 0.8.4.
## Please use `tidyselect::vars_select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Less than 1% of the variance in weight is due to different MSAs
merged$predweight<-sdweight*(fitted(fit))+meanweight
head(ranef(fit)$mmsa)
dim(ranef(fit)$mmsa)
## [1] 118 1
rate<- sdweight*(fixef(fit)+ranef(fit)$mmsa)+meanweight
est<-data.frame(rate =rate, id=rownames(ranef(fit)$mmsa))
head(est)
fit2<-lmer(weightz~male+race_eth+smoke+educ+(1|mmsa),
data=merged,
na.action=na.omit)
arm::display(fit2, detail=T)
## lmer(formula = weightz ~ male + race_eth + smoke + educ + (1 |
## mmsa), data = merged, na.action = na.omit)
## coef.est coef.se t value
## (Intercept) -0.25 0.00 -1288.11
## male 0.02 0.00 170.83
## race_ethhispanic 0.00 0.00 -8.14
## race_ethnh black 0.01 0.00 29.77
## race_ethnh multirace 0.00 0.00 7.84
## race_ethnh other -0.01 0.00 -20.43
## smoke 0.00 0.00 -18.11
## educ0Prim 0.00 0.00 -6.23
## educ1somehs 0.00 0.00 -2.36
## educ3somecol 0.00 0.00 7.88
## educ4colgrad 0.00 0.00 -9.56
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.00
## Residual 0.02
## ---
## number of obs: 159143, groups: mmsa, 118
## AIC = -801553, DIC = -801916.1
## deviance = -801747.7
summary(fit2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: weightz ~ male + race_eth + smoke + educ + (1 | mmsa)
## Data: merged
##
## REML criterion at convergence: -801579.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3286 -0.6823 -0.1648 0.5015 10.5654
##
## Random effects:
## Groups Name Variance Std.Dev.
## mmsa (Intercept) 2.763e-06 0.001662
## Residual 3.793e-04 0.019474
## Number of obs: 159143, groups: mmsa, 118
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -2.511e-01 1.950e-04 2.277e+02 -1288.114 < 2e-16 ***
## male 1.681e-02 9.842e-05 1.591e+05 170.835 < 2e-16 ***
## race_ethhispanic -1.673e-03 2.056e-04 9.187e+04 -8.136 4.15e-16 ***
## race_ethnh black 5.449e-03 1.830e-04 1.413e+05 29.770 < 2e-16 ***
## race_ethnh multirace 2.882e-03 3.675e-04 1.579e+05 7.842 4.47e-15 ***
## race_ethnh other -5.596e-03 2.739e-04 1.588e+05 -20.434 < 2e-16 ***
## smoke -2.603e-03 1.437e-04 1.591e+05 -18.114 < 2e-16 ***
## educ0Prim -2.449e-03 3.929e-04 1.591e+05 -6.233 4.59e-10 ***
## educ1somehs -6.218e-04 2.632e-04 1.591e+05 -2.362 0.0182 *
## educ3somecol 1.061e-03 1.346e-04 1.591e+05 7.880 3.29e-15 ***
## educ4colgrad -1.214e-03 1.271e-04 1.590e+05 -9.555 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) male rc_thh rc_thb rc_thm rc_tho smoke edc0Pr edc1sm
## male -0.229
## rac_thhspnc -0.114 -0.005
## rc_thnhblck -0.122 0.032 0.098
## rc_thnhmltr -0.041 -0.008 0.049 0.051
## rc_thnhothr -0.049 -0.023 0.072 0.065 0.049
## smoke -0.147 -0.034 0.021 -0.007 -0.034 -0.016
## educ0Prim -0.109 0.000 -0.160 -0.008 -0.007 -0.015 0.001
## educ1somehs -0.166 0.002 -0.076 -0.042 -0.008 -0.016 -0.063 0.108
## educ3somecl -0.382 0.026 0.036 0.016 -0.010 0.006 0.044 0.175 0.263
## educ4colgrd -0.419 -0.009 0.083 0.068 0.012 -0.006 0.162 0.178 0.265
## edc3sm
## male
## rac_thhspnc
## rc_thnhblck
## rc_thnhmltr
## rc_thnhothr
## smoke
## educ0Prim
## educ1somehs
## educ3somecl
## educ4colgrd 0.573
Report the variance components of the model and describe the results of the model According to the t-values, all variables but somehs are significant in explaining weight.However, p-values suggest somehs is significant at >.95 Results from different summary methods differ. For the method used in the class notes, race_ethnh other is the only group with a negative relationship with weight with respect to the reference group.
Report the results for the fixed effects
fit.an<-lm(weightz~as.factor(mmsa),
merged)
anova(fit.an)
We see significant variation in our outcomes across the higher level units.
anova(fit,fit2)
## refitting model(s) with ML (instead of REML)