For the following assignment, I am using BRFSS 2016 data. The binary outcome variable for this assignment is, health coverage, whether the respondent has any kind of health care coverage, including health insurance, prepaid plans such as HMOs, or government plans such as Medicare, or Indian Health Service?

The grouping variable is Metropolitan tatisticsl Areas. For including this group level variable, we will need to merge American Community Survey data to BRFSS 2016 . The individual level variables are, education and race and ethnicity.

The research question that is central to this analysis is: Is there variation in health coverage by education and race/ethnicty for people across metropolitan statistical areas.

Loading Libraries

library(car)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(sjPlot)

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

library(ggplot2)
library(pander)
library(knitr)
library(lme4)

Loading BRFSS Data

load("C:/Users/munta/Google Drive/BRFSS16/brfss16_mmsa.Rdata")
set.seed(12345)
#Cleaning up the names
nams<-names(brfss16m)
#Fixing the capital/lower case
newnames<-gsub(pattern = "_",replacement =  "",x =  nams)
names(brfss16m)<-tolower(newnames)
nams<-names(brfss16m)

Recoding Variables

#sex
brfss16m$male<-ifelse(brfss16m$sex==1, 1, 0)

#BMI
brfss16m$bmi<-ifelse(is.na(brfss16m$bmi5)==T, NA, brfss16m$bmi5/100)

#Healthy days
brfss16m$healthdays<-Recode(brfss16m$physhlth, recodes = "88=0; 77=NA; 99=NA")

#Healthy mental health days
brfss16m$healthmdays<-Recode(brfss16m$menthlth, recodes = "88=0; 77=NA; 99=NA")

brfss16m$badhealth<-Recode(brfss16m$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss16m$black<-Recode(brfss16m$racegr3, recodes="2=1; 9=NA; else=0")
brfss16m$white<-Recode(brfss16m$racegr3, recodes="1=1; 9=NA; else=0")
brfss16m$other<-Recode(brfss16m$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss16m$hispanic<-Recode(brfss16m$racegr3, recodes="5=1; 9=NA; else=0")

brfss16m$race_eth<-Recode(brfss16m$racegr3, 
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor.result = T)
brfss16m$race_eth<-relevel(brfss16m$race_eth, ref = "nhwhite")

#insurance
brfss16m$ins<-Recode(brfss16m$hlthpln1, recodes ="7:9=NA; 1=1;2=0")

#income grouping
brfss16m$inc<-Recode(brfss16m$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor.result = T)
brfss16m$inc<-as.ordered(brfss16m$inc)
#education level
brfss16m$educ<-Recode(brfss16m$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor.result=T)
brfss16m$educ<-relevel(brfss16m$educ, ref='2hsgrad')

#employloyment
brfss16m$employ<-Recode(brfss16m$employ1,
recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor.result=T)
brfss16m$employ<-relevel(brfss16m$employ, ref='employloyed')

#marital status
brfss16m$marst<-Recode(brfss16m$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor.result=T)
brfss16m$marst<-relevel(brfss16m$marst, ref='married')

#Age cut into intervals
brfss16m$agec<-cut(brfss16m$age80, breaks=c(0,24,39,59,79,99))

#BMI, in the brfss16ma the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's

brfss16m$bmi<-brfss16m$bmi5/100

#smoking currently
brfss16m$smoke<-Recode(brfss16m$smoker3, 
recodes="1:2=1; 3:4=0; else=NA")
#brfss16m$smoke<-relevel(brfss16m$smoke, ref = "NeverSmoked")

brfss16m$obese<-ifelse(is.na(brfss16m$bmi)==T, NA, 
                       ifelse(brfss16m$bmi>30,1,0))

# #Outcome Variable: How old were you when you were told you have diabetes?
# brfss16m$agediab<-recode(brfss16m$diabage2, recodes = "98=NA; 99=NA")
# summary(brfss16m$agediab)
# hist(brfss16m$agediab)

#Outcome Variable:Do you have any kind of health care coverage, including health insurance, prepaid plans such as HMOs, or government plans such as Medicare, or Indian Health Service?
brfss16m$hltcov<-recode(brfss16m$hlthpln1, recodes="1=1; 2=0; else=NA")
table(brfss16m$hltcov)

## 
##      0      1 
##  16957 231053

Getting ACS data

library(acs)
#Get 2010 ACS median household incomes for tracts in Texas
msaacs<-geo.make(msa="*")

#ACS tables B17 is poverty, B19 is Gini, B25001 is housing vacancy, B25035 is median year built 
acsecon<-acs.fetch(key=mykey, endyear=2010, span=5, geography=msaacs, variable = c("B19083_001","B17001_001","B17001_002" , "B25002_001","B25002_003", "B25035_001"))

colnames(acsecon@estimate)

## [1] "B19083_001" "B17001_001" "B17001_002" "B25002_001" "B25002_003"
## [6] "B25035_001"

msaecon<-data.frame(gini=acsecon@estimate[, "B19083_001"], 
ppoverty=acsecon@estimate[, "B17001_002"]/acsecon@estimate[, "B17001_001"],
giniz=scale(acsecon@estimate[, "B19083_001"]),
pvacant=acsecon@estimate[,"B25002_003"]/acsecon@estimate[, "B25002_001"],
ppovertyz=scale(acsecon@estimate[, "B17001_002"]/acsecon@estimate[, "B17001_001"]), 
pvacantz=scale(acsecon@estimate[,"B25002_003"]/acsecon@estimate[, "B25002_001"]), 
medhouse=acsecon@estimate[, "B25035_001" ],
medhousez=scale(acsecon@estimate[, "B25035_001" ]))

msaecon$ids<-paste(acsecon@geography$metropolitanstatisticalareamicropolitanstatisticalarea)

library(tigris)

## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.

## 
## Attaching package: 'tigris'

## The following object is masked from 'package:graphics':
## 
##     plot

msa<-core_based_statistical_areas(cb=T)
msa_ec<-geo_join(msa, msaecon, "CBSAFP", "ids", how="inner")

Merging BRFSS and ACS

msa_ec$state<-str_sub(msa_ec$NAME, -2,-1)
msaecon<-merge(msaecon, msa_ec@data[, c(1:7,18)], by.x="ids", by.y="CBSAFP", all.x=T)

joindata<-merge(brfss16m, msaecon, by.x="mmsa",by.y="ids", all.x=T)
joindata$bmiz<-as.numeric(scale(joindata$bmi, center=T, scale=T))
#joindata<-joindata[complete.cases(joindata[, c("bmiz", "race_eth", "agec", "educ", "gini")]),]
#and merge the data back to the kids data
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:acs':
## 
##     combine

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

joindata<-joindata%>%
  select(bmiz, obese, hltcov, mmsa, agec, educ, race_eth,smoke, healthmdays, badhealth,bmi,gini, ppoverty, ppovertyz,giniz,medhouse,medhousez, pvacant, pvacantz, male, mmsawt, mmsaname, state )%>%
  filter(complete.cases(.))


head(joindata[, c("hltcov", "agec", "educ", "gini", "ppoverty" , "mmsa", "state")])

##   hltcov    agec     educ  gini  ppoverty  mmsa state
## 1      1 (79,99] 3somecol 0.427 0.1053253 10580    NY
## 2      1 (59,79]  2hsgrad 0.427 0.1053253 10580    NY
## 3      1 (79,99] 4colgrad 0.427 0.1053253 10580    NY
## 4      1 (59,79]  2hsgrad 0.427 0.1053253 10580    NY
## 5      1 (79,99] 3somecol 0.427 0.1053253 10580    NY
## 6      1 (39,59]  1somehs 0.427 0.1053253 10580    NY

meanbmi<-mean(joindata$bmi, na.rm=T)
sdbmi<-sd(joindata$bmi, na.rm=T)

Fitting multilevel model

Individuals who are racial/ethnic minorities, and who live in areas with high poverty rates are at higher risk of not having health coverage.

#MSA level variables
fit.mix1<-lmer(hltcov~educ+race_eth+ ppovertyz+giniz+(1|mmsa), data=joindata)
arm::display(fit.mix1, detail=T)

## lmer(formula = hltcov ~ educ + race_eth + ppovertyz + giniz + 
##     (1 | mmsa), data = joindata)
##                      coef.est coef.se t value
## (Intercept)            0.93     0.00  252.23 
## educ0Prim             -0.10     0.00  -21.87 
## educ1somehs           -0.06     0.00  -19.81 
## educ3somecol           0.02     0.00   15.77 
## educ4colgrad           0.06     0.00   38.52 
## race_ethhispanic      -0.15     0.00  -59.49 
## race_ethnh black      -0.04     0.00  -18.61 
## race_ethnh multirace  -0.04     0.00   -8.04 
## race_ethnh other      -0.04     0.00  -12.00 
## ppovertyz              0.02     0.01    3.78 
## giniz                 -0.01     0.00   -1.74 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.02    
##  Residual             0.24    
## ---
## number of obs: 171032, groups: mmsa, 123
## AIC = -3241.5, DIC = -3486.3
## deviance = -3376.9

It shows that, individuals with different race/ethnic background than non-Hispanic whites, and who are poor have a higher risk of not having health coverage.

Cross-level interaction model

This involves forming the interaction term between an individual level variable and a higher level variable. In this case, we will interact race/ethnicity with poverty at the MSAs. We can state this kind of hypothesis as “Ethinic minority groups, living in poverty in different MSAs will have no health coverage”.

fit.mix2<-lmer(hltcov~educ+race_eth+ppovertyz+giniz+ppoverty*race_eth+(1|mmsa), joindata, REML=F)

## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient

arm::display(fit.mix2, detail=T)

## lmer(formula = hltcov ~ educ + race_eth + ppovertyz + giniz + 
##     ppoverty * race_eth + (1 | mmsa), data = joindata, REML = F)
##                               coef.est coef.se t value
## (Intercept)                     0.93     0.00  256.94 
## educ0Prim                      -0.10     0.00  -21.86 
## educ1somehs                    -0.06     0.00  -19.78 
## educ3somecol                    0.02     0.00   15.77 
## educ4colgrad                    0.06     0.00   38.50 
## race_ethhispanic               -0.19     0.01  -22.10 
## race_ethnh black               -0.04     0.01   -3.73 
## race_ethnh multirace           -0.01     0.02   -0.35 
## race_ethnh other               -0.06     0.02   -3.67 
## ppovertyz                       0.01     0.01    1.38 
## giniz                          -0.01     0.00   -1.28 
## race_ethhispanic:ppoverty       0.32     0.06    5.09 
## race_ethnh black:ppoverty      -0.02     0.07   -0.29 
## race_ethnh multirace:ppoverty  -0.23     0.19   -1.19 
## race_ethnh other:ppoverty       0.13     0.12    1.10 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.02    
##  Residual             0.24    
## ---
## number of obs: 171032, groups: mmsa, 123
## AIC = -3369, DIC = -3403
## deviance = -3403.0

We can see that blacks in MSAs living in poverty, will have lower chances of having health care coverage.

Compare the models with a LRT

anova(fit.mix1, fit.mix2)

## refitting model(s) with ML (instead of REML)

## Data: joindata
## Models:
## fit.mix1: hltcov ~ educ + race_eth + ppovertyz + giniz + (1 | mmsa)
## fit.mix2: hltcov ~ educ + race_eth + ppovertyz + giniz + ppoverty * race_eth + 
## fit.mix2:     (1 | mmsa)
##          Df     AIC     BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## fit.mix1 13 -3350.9 -3220.2 1688.4  -3376.9                             
## fit.mix2 17 -3369.0 -3198.2 1701.5  -3403.0 26.178      4  2.913e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Test if a generalized linear mixed model (GLMM) is justified

anova(glm(hltcov~mmsa, joindata, family=binomial), test="Chisq")

## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: hltcov
## 
## Terms added sequentially (first to last)
## 
## 
##      Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                171031      82397              
## mmsa  1   26.759    171030      82370 2.305e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The global likelihood ratio informs us that there is variance between my grouping variable levels, which means we can go ahead and do the generalized linear mixed models.

Generalized Linear Mixed Models

Multilevel Logistic Model with Individual Level Predictors

joindata$mmsa_fac<-as.factor(joindata$mmsa)
fit.mix.bin<-glmer(hltcov~educ+race_eth+(1|mmsa_fac), family=binomial, joindata,
                   control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))

fit.mix.bin<-refit(fit.mix.bin)
arm::display(fit.mix.bin, detail=T)

## glmer(formula = hltcov ~ educ + race_eth + (1 | mmsa_fac), data = joindata, 
##     family = binomial, control = glmerControl(optimizer = "bobyqa", 
##         optCtrl = list(maxfun = 2e+05)))
##                      coef.est coef.se z value Pr(>|z|)
## (Intercept)            2.60     0.04   68.08    0.00  
## educ0Prim             -0.62     0.05  -13.18    0.00  
## educ1somehs           -0.50     0.04  -13.84    0.00  
## educ3somecol           0.36     0.03   14.11    0.00  
## educ4colgrad           1.18     0.03   40.50    0.00  
## race_ethhispanic      -1.46     0.03  -48.85    0.00  
## race_ethnh black      -0.63     0.03  -19.15    0.00  
## race_ethnh multirace  -0.62     0.07   -8.89    0.00  
## race_ethnh other      -0.70     0.05  -13.49    0.00  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa_fac (Intercept) 0.36    
##  Residual             1.00    
## ---
## number of obs: 171032, groups: mmsa_fac, 123
## AIC = 74523.6, DIC = 73743
## deviance = 74123.3

#odds ratios
exp(fixef(fit.mix.bin)[-1])

##            educ0Prim          educ1somehs         educ3somecol 
##            0.5353796            0.6057856            1.4270454 
##         educ4colgrad     race_ethhispanic     race_ethnh black 
##            3.2472614            0.2311821            0.5343628 
## race_ethnh multirace     race_ethnh other 
##            0.5377257            0.4965547

exp(confint(fit.mix.bin, method="Wald"))

##                           2.5 %     97.5 %
## .sig01                       NA         NA
## (Intercept)          12.4369660 14.4414276
## educ0Prim             0.4878811  0.5875024
## educ1somehs           0.5642692  0.6503567
## educ3somecol          1.3582678  1.4993057
## educ4colgrad          3.0673412  3.4377352
## race_ethhispanic      0.2179901  0.2451724
## race_ethnh black      0.5011674  0.5697568
## race_ethnh multirace  0.4690040  0.6165168
## race_ethnh other      0.4485194  0.5497345

The odds ratios show that, people with college degree are more than 3 times more likely to have health coverage, whereas, individuals with primary education are 40% less likely to have coverage. Individuals who are not non-Hispanic whites, are around 50% less likely to have health coverage.

Multilevel logistic model with a higher level variable

fit.mix.bin2<-glmer(hltcov~educ+race_eth+ppoverty+giniz+(1|mmsa_fac), family=binomial, joindata,
                   control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0122992 (tol =
## 0.001, component 1)

fit.mix.bin2<-refit(fit.mix.bin2)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00433569 (tol =
## 0.001, component 1)

arm::display(fit.mix.bin2, detail=T)

## glmer(formula = hltcov ~ educ + race_eth + ppoverty + giniz + 
##     (1 | mmsa_fac), data = joindata, family = binomial, control = glmerControl(optimizer = "bobyqa", 
##     optCtrl = list(maxfun = 2e+05)))
##                      coef.est coef.se z value Pr(>|z|)
## (Intercept)            2.24     0.07   31.15    0.00  
## educ0Prim             -0.63     0.05  -13.17    0.00  
## educ1somehs           -0.50     0.04  -13.87    0.00  
## educ3somecol           0.36     0.03   14.13    0.00  
## educ4colgrad           1.18     0.03   40.53    0.00  
## race_ethhispanic      -1.47     0.03  -48.92    0.00  
## race_ethnh black      -0.63     0.03  -19.11    0.00  
## race_ethnh multirace  -0.62     0.07   -8.88    0.00  
## race_ethnh other      -0.70     0.05  -13.47    0.00  
## ppoverty               2.72     0.48    5.72    0.00  
## giniz                 -0.08     0.05   -1.66    0.10  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa_fac (Intercept) 0.35    
##  Residual             1.00    
## ---
## number of obs: 171032, groups: mmsa_fac, 123
## AIC = 74522.2, DIC = 73750.6
## deviance = 74124.4

#odds ratios
exp(fixef(fit.mix.bin2)[-1])

##            educ0Prim          educ1somehs         educ3somecol 
##            0.5349975            0.6056013            1.4270925 
##         educ4colgrad     race_ethhispanic     race_ethnh black 
##            3.2486495            0.2305806            0.5344457 
## race_ethnh multirace     race_ethnh other             ppoverty 
##            0.5377803            0.4969015           15.1652886 
##                giniz 
##            0.9195916

exp(confint(fit.mix.bin2, method="Wald"))

##                          2.5 %     97.5 %
## .sig01                      NA         NA
## (Intercept)          8.1529059 10.8061783
## educ0Prim            0.4874317  0.5872051
## educ1somehs          0.5641561  0.6500912
## educ3somecol         1.3583990  1.4992597
## educ4colgrad         3.0687332  3.4391141
## race_ethhispanic     0.2174185  0.2445394
## race_ethnh black     0.5011858  0.5699127
## race_ethnh multirace 0.4689449  0.6167199
## race_ethnh other     0.4488165  0.5501382
## ppoverty             5.9729373 38.5046696
## giniz                0.8328148  1.0154102

Including the group level predictors, does not seem to change the outcome that much.

LRT between models

anova(fit.mix.bin, fit.mix.bin2)

## Data: joindata
## Models:
## fit.mix.bin: hltcov ~ educ + race_eth + (1 | mmsa_fac)
## fit.mix.bin2: hltcov ~ educ + race_eth + ppoverty + giniz + (1 | mmsa_fac)
##              Df   AIC   BIC logLik deviance  Chisq Chi Df Pr(>Chisq)  
## fit.mix.bin  10 74524 74624 -37252    74504                           
## fit.mix.bin2 12 74522 74643 -37249    74498 5.3795      2     0.0679 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The likelihood ratio test shows that, the difference is not significant. Which means, the difference in the outcome of health coverage by rac/ethnicity, education, poverty across MSAs is not significant, and we can’t conclude whether, health coverage will vary by MSAs.

HW8

Muntasir

April 9, 2018