For the following assignment, I am using BRFSS 2016 data. The binary outcome variable for this assignment is, health coverage, whether the respondent has any kind of health care coverage, including health insurance, prepaid plans such as HMOs, or government plans such as Medicare, or Indian Health Service?
The grouping variable is Metropolitan tatisticsl Areas. For including this group level variable, we will need to merge American Community Survey data to BRFSS 2016 . The individual level variables are, education and race and ethnicity.
The research question that is central to this analysis is: Is there variation in health coverage by education and race/ethnicty for people across metropolitan statistical areas.
library(car)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(sjPlot)
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
library(ggplot2)
library(pander)
library(knitr)
library(lme4)
load("C:/Users/munta/Google Drive/BRFSS16/brfss16_mmsa.Rdata")
set.seed(12345)
#Cleaning up the names
nams<-names(brfss16m)
#Fixing the capital/lower case
newnames<-gsub(pattern = "_",replacement = "",x = nams)
names(brfss16m)<-tolower(newnames)
nams<-names(brfss16m)
#sex
brfss16m$male<-ifelse(brfss16m$sex==1, 1, 0)
#BMI
brfss16m$bmi<-ifelse(is.na(brfss16m$bmi5)==T, NA, brfss16m$bmi5/100)
#Healthy days
brfss16m$healthdays<-Recode(brfss16m$physhlth, recodes = "88=0; 77=NA; 99=NA")
#Healthy mental health days
brfss16m$healthmdays<-Recode(brfss16m$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss16m$badhealth<-Recode(brfss16m$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss16m$black<-Recode(brfss16m$racegr3, recodes="2=1; 9=NA; else=0")
brfss16m$white<-Recode(brfss16m$racegr3, recodes="1=1; 9=NA; else=0")
brfss16m$other<-Recode(brfss16m$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss16m$hispanic<-Recode(brfss16m$racegr3, recodes="5=1; 9=NA; else=0")
brfss16m$race_eth<-Recode(brfss16m$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor.result = T)
brfss16m$race_eth<-relevel(brfss16m$race_eth, ref = "nhwhite")
#insurance
brfss16m$ins<-Recode(brfss16m$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
#income grouping
brfss16m$inc<-Recode(brfss16m$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor.result = T)
brfss16m$inc<-as.ordered(brfss16m$inc)
#education level
brfss16m$educ<-Recode(brfss16m$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor.result=T)
brfss16m$educ<-relevel(brfss16m$educ, ref='2hsgrad')
#employloyment
brfss16m$employ<-Recode(brfss16m$employ1,
recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor.result=T)
brfss16m$employ<-relevel(brfss16m$employ, ref='employloyed')
#marital status
brfss16m$marst<-Recode(brfss16m$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor.result=T)
brfss16m$marst<-relevel(brfss16m$marst, ref='married')
#Age cut into intervals
brfss16m$agec<-cut(brfss16m$age80, breaks=c(0,24,39,59,79,99))
#BMI, in the brfss16ma the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's
brfss16m$bmi<-brfss16m$bmi5/100
#smoking currently
brfss16m$smoke<-Recode(brfss16m$smoker3,
recodes="1:2=1; 3:4=0; else=NA")
#brfss16m$smoke<-relevel(brfss16m$smoke, ref = "NeverSmoked")
brfss16m$obese<-ifelse(is.na(brfss16m$bmi)==T, NA,
ifelse(brfss16m$bmi>30,1,0))
# #Outcome Variable: How old were you when you were told you have diabetes?
# brfss16m$agediab<-recode(brfss16m$diabage2, recodes = "98=NA; 99=NA")
# summary(brfss16m$agediab)
# hist(brfss16m$agediab)
#Outcome Variable:Do you have any kind of health care coverage, including health insurance, prepaid plans such as HMOs, or government plans such as Medicare, or Indian Health Service?
brfss16m$hltcov<-recode(brfss16m$hlthpln1, recodes="1=1; 2=0; else=NA")
table(brfss16m$hltcov)
##
## 0 1
## 16957 231053
library(acs)
#Get 2010 ACS median household incomes for tracts in Texas
msaacs<-geo.make(msa="*")
#ACS tables B17 is poverty, B19 is Gini, B25001 is housing vacancy, B25035 is median year built
acsecon<-acs.fetch(key=mykey, endyear=2010, span=5, geography=msaacs, variable = c("B19083_001","B17001_001","B17001_002" , "B25002_001","B25002_003", "B25035_001"))
colnames(acsecon@estimate)
## [1] "B19083_001" "B17001_001" "B17001_002" "B25002_001" "B25002_003"
## [6] "B25035_001"
msaecon<-data.frame(gini=acsecon@estimate[, "B19083_001"],
ppoverty=acsecon@estimate[, "B17001_002"]/acsecon@estimate[, "B17001_001"],
giniz=scale(acsecon@estimate[, "B19083_001"]),
pvacant=acsecon@estimate[,"B25002_003"]/acsecon@estimate[, "B25002_001"],
ppovertyz=scale(acsecon@estimate[, "B17001_002"]/acsecon@estimate[, "B17001_001"]),
pvacantz=scale(acsecon@estimate[,"B25002_003"]/acsecon@estimate[, "B25002_001"]),
medhouse=acsecon@estimate[, "B25035_001" ],
medhousez=scale(acsecon@estimate[, "B25035_001" ]))
msaecon$ids<-paste(acsecon@geography$metropolitanstatisticalareamicropolitanstatisticalarea)
library(tigris)
## To enable
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
##
## Attaching package: 'tigris'
## The following object is masked from 'package:graphics':
##
## plot
msa<-core_based_statistical_areas(cb=T)
msa_ec<-geo_join(msa, msaecon, "CBSAFP", "ids", how="inner")
msa_ec$state<-str_sub(msa_ec$NAME, -2,-1)
msaecon<-merge(msaecon, msa_ec@data[, c(1:7,18)], by.x="ids", by.y="CBSAFP", all.x=T)
joindata<-merge(brfss16m, msaecon, by.x="mmsa",by.y="ids", all.x=T)
joindata$bmiz<-as.numeric(scale(joindata$bmi, center=T, scale=T))
#joindata<-joindata[complete.cases(joindata[, c("bmiz", "race_eth", "agec", "educ", "gini")]),]
#and merge the data back to the kids data
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:acs':
##
## combine
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
joindata<-joindata%>%
select(bmiz, obese, hltcov, mmsa, agec, educ, race_eth,smoke, healthmdays, badhealth,bmi,gini, ppoverty, ppovertyz,giniz,medhouse,medhousez, pvacant, pvacantz, male, mmsawt, mmsaname, state )%>%
filter(complete.cases(.))
head(joindata[, c("hltcov", "agec", "educ", "gini", "ppoverty" , "mmsa", "state")])
## hltcov agec educ gini ppoverty mmsa state
## 1 1 (79,99] 3somecol 0.427 0.1053253 10580 NY
## 2 1 (59,79] 2hsgrad 0.427 0.1053253 10580 NY
## 3 1 (79,99] 4colgrad 0.427 0.1053253 10580 NY
## 4 1 (59,79] 2hsgrad 0.427 0.1053253 10580 NY
## 5 1 (79,99] 3somecol 0.427 0.1053253 10580 NY
## 6 1 (39,59] 1somehs 0.427 0.1053253 10580 NY
meanbmi<-mean(joindata$bmi, na.rm=T)
sdbmi<-sd(joindata$bmi, na.rm=T)
Individuals who are racial/ethnic minorities, and who live in areas with high poverty rates are at higher risk of not having health coverage.
#MSA level variables
fit.mix1<-lmer(hltcov~educ+race_eth+ ppovertyz+giniz+(1|mmsa), data=joindata)
arm::display(fit.mix1, detail=T)
## lmer(formula = hltcov ~ educ + race_eth + ppovertyz + giniz +
## (1 | mmsa), data = joindata)
## coef.est coef.se t value
## (Intercept) 0.93 0.00 252.23
## educ0Prim -0.10 0.00 -21.87
## educ1somehs -0.06 0.00 -19.81
## educ3somecol 0.02 0.00 15.77
## educ4colgrad 0.06 0.00 38.52
## race_ethhispanic -0.15 0.00 -59.49
## race_ethnh black -0.04 0.00 -18.61
## race_ethnh multirace -0.04 0.00 -8.04
## race_ethnh other -0.04 0.00 -12.00
## ppovertyz 0.02 0.01 3.78
## giniz -0.01 0.00 -1.74
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.02
## Residual 0.24
## ---
## number of obs: 171032, groups: mmsa, 123
## AIC = -3241.5, DIC = -3486.3
## deviance = -3376.9
It shows that, individuals with different race/ethnic background than non-Hispanic whites, and who are poor have a higher risk of not having health coverage.
This involves forming the interaction term between an individual level variable and a higher level variable. In this case, we will interact race/ethnicity with poverty at the MSAs. We can state this kind of hypothesis as “Ethinic minority groups, living in poverty in different MSAs will have no health coverage”.
fit.mix2<-lmer(hltcov~educ+race_eth+ppovertyz+giniz+ppoverty*race_eth+(1|mmsa), joindata, REML=F)
## fixed-effect model matrix is rank deficient so dropping 1 column / coefficient
arm::display(fit.mix2, detail=T)
## lmer(formula = hltcov ~ educ + race_eth + ppovertyz + giniz +
## ppoverty * race_eth + (1 | mmsa), data = joindata, REML = F)
## coef.est coef.se t value
## (Intercept) 0.93 0.00 256.94
## educ0Prim -0.10 0.00 -21.86
## educ1somehs -0.06 0.00 -19.78
## educ3somecol 0.02 0.00 15.77
## educ4colgrad 0.06 0.00 38.50
## race_ethhispanic -0.19 0.01 -22.10
## race_ethnh black -0.04 0.01 -3.73
## race_ethnh multirace -0.01 0.02 -0.35
## race_ethnh other -0.06 0.02 -3.67
## ppovertyz 0.01 0.01 1.38
## giniz -0.01 0.00 -1.28
## race_ethhispanic:ppoverty 0.32 0.06 5.09
## race_ethnh black:ppoverty -0.02 0.07 -0.29
## race_ethnh multirace:ppoverty -0.23 0.19 -1.19
## race_ethnh other:ppoverty 0.13 0.12 1.10
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.02
## Residual 0.24
## ---
## number of obs: 171032, groups: mmsa, 123
## AIC = -3369, DIC = -3403
## deviance = -3403.0
We can see that blacks in MSAs living in poverty, will have lower chances of having health care coverage.
anova(fit.mix1, fit.mix2)
## refitting model(s) with ML (instead of REML)
## Data: joindata
## Models:
## fit.mix1: hltcov ~ educ + race_eth + ppovertyz + giniz + (1 | mmsa)
## fit.mix2: hltcov ~ educ + race_eth + ppovertyz + giniz + ppoverty * race_eth +
## fit.mix2: (1 | mmsa)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.mix1 13 -3350.9 -3220.2 1688.4 -3376.9
## fit.mix2 17 -3369.0 -3198.2 1701.5 -3403.0 26.178 4 2.913e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(glm(hltcov~mmsa, joindata, family=binomial), test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: hltcov
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 171031 82397
## mmsa 1 26.759 171030 82370 2.305e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The global likelihood ratio informs us that there is variance between my grouping variable levels, which means we can go ahead and do the generalized linear mixed models.
joindata$mmsa_fac<-as.factor(joindata$mmsa)
fit.mix.bin<-glmer(hltcov~educ+race_eth+(1|mmsa_fac), family=binomial, joindata,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))
fit.mix.bin<-refit(fit.mix.bin)
arm::display(fit.mix.bin, detail=T)
## glmer(formula = hltcov ~ educ + race_eth + (1 | mmsa_fac), data = joindata,
## family = binomial, control = glmerControl(optimizer = "bobyqa",
## optCtrl = list(maxfun = 2e+05)))
## coef.est coef.se z value Pr(>|z|)
## (Intercept) 2.60 0.04 68.08 0.00
## educ0Prim -0.62 0.05 -13.18 0.00
## educ1somehs -0.50 0.04 -13.84 0.00
## educ3somecol 0.36 0.03 14.11 0.00
## educ4colgrad 1.18 0.03 40.50 0.00
## race_ethhispanic -1.46 0.03 -48.85 0.00
## race_ethnh black -0.63 0.03 -19.15 0.00
## race_ethnh multirace -0.62 0.07 -8.89 0.00
## race_ethnh other -0.70 0.05 -13.49 0.00
##
## Error terms:
## Groups Name Std.Dev.
## mmsa_fac (Intercept) 0.36
## Residual 1.00
## ---
## number of obs: 171032, groups: mmsa_fac, 123
## AIC = 74523.6, DIC = 73743
## deviance = 74123.3
#odds ratios
exp(fixef(fit.mix.bin)[-1])
## educ0Prim educ1somehs educ3somecol
## 0.5353796 0.6057856 1.4270454
## educ4colgrad race_ethhispanic race_ethnh black
## 3.2472614 0.2311821 0.5343628
## race_ethnh multirace race_ethnh other
## 0.5377257 0.4965547
exp(confint(fit.mix.bin, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## (Intercept) 12.4369660 14.4414276
## educ0Prim 0.4878811 0.5875024
## educ1somehs 0.5642692 0.6503567
## educ3somecol 1.3582678 1.4993057
## educ4colgrad 3.0673412 3.4377352
## race_ethhispanic 0.2179901 0.2451724
## race_ethnh black 0.5011674 0.5697568
## race_ethnh multirace 0.4690040 0.6165168
## race_ethnh other 0.4485194 0.5497345
The odds ratios show that, people with college degree are more than 3 times more likely to have health coverage, whereas, individuals with primary education are 40% less likely to have coverage. Individuals who are not non-Hispanic whites, are around 50% less likely to have health coverage.
fit.mix.bin2<-glmer(hltcov~educ+race_eth+ppoverty+giniz+(1|mmsa_fac), family=binomial, joindata,
control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0122992 (tol =
## 0.001, component 1)
fit.mix.bin2<-refit(fit.mix.bin2)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.00433569 (tol =
## 0.001, component 1)
arm::display(fit.mix.bin2, detail=T)
## glmer(formula = hltcov ~ educ + race_eth + ppoverty + giniz +
## (1 | mmsa_fac), data = joindata, family = binomial, control = glmerControl(optimizer = "bobyqa",
## optCtrl = list(maxfun = 2e+05)))
## coef.est coef.se z value Pr(>|z|)
## (Intercept) 2.24 0.07 31.15 0.00
## educ0Prim -0.63 0.05 -13.17 0.00
## educ1somehs -0.50 0.04 -13.87 0.00
## educ3somecol 0.36 0.03 14.13 0.00
## educ4colgrad 1.18 0.03 40.53 0.00
## race_ethhispanic -1.47 0.03 -48.92 0.00
## race_ethnh black -0.63 0.03 -19.11 0.00
## race_ethnh multirace -0.62 0.07 -8.88 0.00
## race_ethnh other -0.70 0.05 -13.47 0.00
## ppoverty 2.72 0.48 5.72 0.00
## giniz -0.08 0.05 -1.66 0.10
##
## Error terms:
## Groups Name Std.Dev.
## mmsa_fac (Intercept) 0.35
## Residual 1.00
## ---
## number of obs: 171032, groups: mmsa_fac, 123
## AIC = 74522.2, DIC = 73750.6
## deviance = 74124.4
#odds ratios
exp(fixef(fit.mix.bin2)[-1])
## educ0Prim educ1somehs educ3somecol
## 0.5349975 0.6056013 1.4270925
## educ4colgrad race_ethhispanic race_ethnh black
## 3.2486495 0.2305806 0.5344457
## race_ethnh multirace race_ethnh other ppoverty
## 0.5377803 0.4969015 15.1652886
## giniz
## 0.9195916
exp(confint(fit.mix.bin2, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## (Intercept) 8.1529059 10.8061783
## educ0Prim 0.4874317 0.5872051
## educ1somehs 0.5641561 0.6500912
## educ3somecol 1.3583990 1.4992597
## educ4colgrad 3.0687332 3.4391141
## race_ethhispanic 0.2174185 0.2445394
## race_ethnh black 0.5011858 0.5699127
## race_ethnh multirace 0.4689449 0.6167199
## race_ethnh other 0.4488165 0.5501382
## ppoverty 5.9729373 38.5046696
## giniz 0.8328148 1.0154102
Including the group level predictors, does not seem to change the outcome that much.
anova(fit.mix.bin, fit.mix.bin2)
## Data: joindata
## Models:
## fit.mix.bin: hltcov ~ educ + race_eth + (1 | mmsa_fac)
## fit.mix.bin2: hltcov ~ educ + race_eth + ppoverty + giniz + (1 | mmsa_fac)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.mix.bin 10 74524 74624 -37252 74504
## fit.mix.bin2 12 74522 74643 -37249 74498 5.3795 2 0.0679 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The likelihood ratio test shows that, the difference is not significant. Which means, the difference in the outcome of health coverage by rac/ethnicity, education, poverty across MSAs is not significant, and we can’t conclude whether, health coverage will vary by MSAs.