#load brfss
library(car)
library(stargazer)
library(survey)
library(sjPlot)
library(ggplot2)
library(pander)
library(knitr)
library(tidycensus)
library(dplyr)
library(haven)
tb17<-load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
nams<-names(brfss_17)
newnames<-gsub(pattern = "_",
replacement = "",
x = nams)
names(brfss_17)<-tolower(newnames)
##Recode Variables
#sex
brfss_17$male<-ifelse(brfss_17$sex==1, 1, 0)
#BMI
brfss_17$bmi<-ifelse(is.na(brfss_17$bmi5)==T, NA, brfss_17$bmi5/100)
#Healthy days
brfss_17$healthdays<-Recode(brfss_17$physhlth, recodes = "88=0; 77=NA; 99=NA")
#Healthy mental health days
brfss_17$mental<-Recode(brfss_17$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")
brfss_17$race_eth<-Recode(brfss_17$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss_17$race_eth<-relevel(brfss_17$race_eth, ref = "nhwhite")
#insurance
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
#income grouping
brfss_17$inc<-Recode(brfss_17$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor = T)
brfss_17$inc<-as.ordered(brfss_17$inc)
#education level
brfss_17$educ<-Recode(brfss_17$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
brfss_17$educ<-relevel(brfss_17$educ, ref='2hsgrad')
#employment
brfss_17$employ<-Recode(brfss_17$employ1,
recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='employloyed')
#marital status
brfss_17$marst<-Recode(brfss_17$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst,
ref='married')
#Age cut into intervals
brfss_17$agec<-cut(brfss_17$age80,
breaks=c(0,24,39,59,79,99))
#BMI, in the brfss_17a the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's
brfss_17$bmi<-brfss_17$bmi5/100
#smoking currently
brfss_17$smoke<-Recode(brfss_17$smoker3,
recodes="1:2=1; 3:4=0; else=NA")
#brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")
brfss_17$obese<-ifelse(is.na(brfss_17$bmi)==T, NA,
ifelse(brfss_17$bmi>30,1,0))
1.) Estimate the random intercept model with just the higher level units as your predictor (NULL model)report the variance components and ICC
merged<-brfss_17%>%
dplyr::select(employ, ins, mmsa, educ, hispanic, mental, male, smoke, mmsawt)%>%
filter(complete.cases(.))
fit.an<-lm(mental~as.factor(mmsa),
merged)
anova(fit.an)
## Analysis of Variance Table
##
## Response: mental
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(mmsa) 135 46318 343.10 5.701 < 2.2e-16 ***
## Residuals 210917 12693327 60.18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#The F-ratio is bigger than 1 at 5.701, the p-value is small at 2.2e-16. There is significant variation between mental health and average mental health between cities. Because there is significant variation with these variables, we move forward.
library(lme4)
library(lmerTest)
library(arm)
fit<-lmer(mental~(1|mmsa),
data=merged)
arm::display(fit, detail=T)
## lmer(formula = mental ~ (1 | mmsa), data = merged)
## coef.est coef.se t value
## (Intercept) 3.63 0.05 80.46
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.47
## Residual 7.76
## ---
## number of obs: 211053, groups: mmsa, 136
## AIC = 1.46395e+06, DIC = 1463930
## deviance = 1463934.7
#to calculate ICC
(.47^2)/(7.76^2+.47^2)
## [1] 0.003654955
#ICC is 0.003 indicating that most likely no two people randomly selected will be identical to each other when looking at average mental health days.
2.) Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual - level predictors
1.Report the variance components of the model and describe the results of the model
2.Report the results for the fixed effects
fit2<-lmer(mental~employ+ins+mmsa+educ+hispanic+male+smoke+(1|mmsa),
data=merged)
arm::display(fit2, detail=T)
## lmer(formula = mental ~ employ + ins + mmsa + educ + hispanic +
## male + smoke + (1 | mmsa), data = merged)
## coef.est coef.se t value
## (Intercept) 4.19 0.12 36.36
## employnilf 1.37 0.05 26.52
## employretired -0.67 0.04 -17.40
## employunable 7.07 0.07 99.72
## ins -0.96 0.06 -14.85
## mmsa 0.00 0.00 0.93
## educ0Prim -0.40 0.12 -3.33
## educ1somehs 0.34 0.09 3.82
## educ3somecol 0.19 0.05 4.17
## educ4colgrad -0.49 0.04 -11.41
## hispanic -0.15 0.06 -2.32
## male -0.96 0.03 -29.18
## smoke 2.63 0.05 53.55
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.30
## Residual 7.41
## ---
## number of obs: 211053, groups: mmsa, 136
## AIC = 1.44453e+06, DIC = 1444353
## deviance = 1444425.8
#to calculate ICC
(.30^2)/(7.41^2+.30^2)
## [1] 0.00163642
#The numbers for fit2 are now at .30 and 7.41, this indicates differences between cities while explaining individual health mental days. The ICC for this model is now .001 lower than fit1. Based on the new numbers two randomly selected people from various cities have a lower change of being identical for their health mental days. Based on the numbers, there are differences in the employed status. Retired individuals report less healthy mental days with -0.67. An employed person is more like to report healthy mental days at 7.07. Oddly, for smokers report 2.63 healthy mental days.
Compare the models from 1 and 2 using a likelihood ratio test (anova(fit1, fit2, test=”Chisq”)) .
anova(fit, fit2)
## Data: merged
## Models:
## fit: mental ~ (1 | mmsa)
## fit2: mental ~ employ + ins + mmsa + educ + hispanic + male + smoke +
## fit2: (1 | mmsa)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## fit 3 1463941 1463972 -731967 1463935
## fit2 15 1444456 1444610 -722213 1444426 19509 12 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Does the inclusion of the individual level variables increase the overall model fit?
rand(fit2)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## mental ~ employ + ins + mmsa + educ + hispanic + male + smoke + (1 | mmsa)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 15 -722249 1444528
## (1 | mmsa) 14 -722316 1444660 133.12 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Based on the chi-square numbers fit2 model fits better. The chi-square coefficient is at 19509 indicating there is a minute chant . The AIC is also lowest on fit2, the lower the AIC the better. with the pvalue of < 2.2e-16 indicating statistical significance.