My outcome of interest is a person’s chronic disease condition, measured by asking, ‘Has a doctor, nurse, or other health professional ever told you that you had a depressive disorder (including depression, major depression, dysthymia, or minor depression)? For this homework, I am using 2017 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART county data and merging it to the 2011 American Community Survey 5-year estimates at the state level to get the higher level predictors.

Loading BRFSS Data

library(car)
library(stargazer)
library(survey)
library(sjPlot)
library(ggplot2)
library(pander)
library(knitr)
library(tidycensus)
library(dplyr)
library(lme4)
library(arm)
library(haven)
library(censusapi)
library(tidyverse)
library(sf)
library(lmerTest)

##Recoding Variables

load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))

# Cleaning the  variable names for space, underscore & Uppercase Characters
nams<-names(brfss_17)
newnames<-gsub("_", "", nams)
names(brfss_17)<-make.names(tolower(newnames), unique = T)

Recode variables

#Healthy days
brfss_17$healthdays<-Recode(brfss_17$physhlth, recodes = "88=0; 77=NA; 99=NA")

# sex
brfss_17$sex<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))

brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")

#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")

brfss_17$race_eth<-Recode(brfss_17$racegr3, 
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss_17$race_eth<-relevel(brfss_17$race_eth, ref = "nhwhite")

#insurance
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")

#income grouping
brfss_17$inc<-ifelse(brfss_17$incomg==9, NA, brfss_17$incomg)

#education level
brfss_17$educ<-Recode(brfss_17$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
brfss_17$educ<-relevel(brfss_17$educ, ref='2hsgrad')

#employment
brfss_17$employ<-Recode(brfss_17$employ1,
recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='Employed')

#marital status
brfss_17$marst<-Recode(brfss_17$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst, ref='married')

#Age cut into intervals
brfss_17$agec<-cut(brfss_17$age80, breaks=c(0,24,39,59,79,99))

#BMI, in the brfss_17a the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's

brfss_17$bmi<-brfss_17$bmi5/100

#smoking currently
brfss_17$smoke<-Recode(brfss_17$smoker3, 
recodes="1:2=1; 3:4=0; else=NA")


brfss_17$skcancer<-Recode(brfss_17$chcscncr, recodes = "1= 1; 2= 0; else=NA")
brfss_17$kidney<-Recode(brfss_17$chckidny, recodes = "1= 1; 2= 0; else=NA")
brfss_17$chrobpuld<-Recode(brfss_17$chccopd1, recodes = "1= 1; 2= 0; else=NA")
brfss_17$arthritis<-Recode(brfss_17$havarth3, recodes = "1= 1; 2= 0; else=NA")
brfss_17$depdisorder<-Recode(brfss_17$addepev2, recodes = "1= 1; 2:4=0; else=NA")

1 Conduct a multi-level analysis where you: Estimate the random intercept model with just the higher level units as your predictor (NULL model) report the variance components and ICC.

merged<-brfss_17%>%
  dplyr::select( depdisorder, mmsa, sex, agec, badhealth, marst, educ, race_eth, employ, badhealth,bmi, mmsaname, mmsawt )%>%
  filter(complete.cases(.))

fit.an<-lm(depdisorder~as.factor(mmsa),
           merged)
anova(fit.an)
## Analysis of Variance Table
## 
## Response: depdisorder
##                     Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(mmsa)    135    170 1.26154  7.9205 < 2.2e-16 ***
## Residuals       203961  32486 0.15928                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Higher level Predictor

fit<-lmer(depdisorder~(1|mmsa),
           data=merged)
arm::display(fit,
             detail=T)
## lmer(formula = depdisorder ~ (1 | mmsa), data = merged)
##             coef.est coef.se t value
## (Intercept)  0.20     0.00   80.31  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.03    
##  Residual             0.40    
## ---
## number of obs: 204097, groups: mmsa, 136
## AIC = 204518, DIC = 204491.8
## deviance = 204502.0
performance::icc(fit)
## # Intraclass Correlation Coefficient
## 
##      Adjusted ICC: 0.005
##   Conditional ICC: 0.005

The interclass correlation Coefficient (ICC) is a measure of the proportion of variance that is between people versus the total variance (i.e., variance between people and variance within persons). The result indicates a significance since the t-value is more than 2 meaning on average people that are diagnosed of depressive disoder a chronic health condition are significantly different. the variation however, at the metropolitan municipal and statistical area level is 0.03 and the individual level at 0.40. The ICC value is 0.005 which means individuals at the mmsa are very different considering their predisposition depressive disorder.

2 Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual - level predictors. Report the variance components of the model and describe the results of the model. Report the results for the fixed effects,

Individual level Predictor

fit2<-lmer(depdisorder~sex+agec+educ+(1|mmsa),
           data=merged,
           na.action=na.omit)
arm::display(fit2, detail=T)
## lmer(formula = depdisorder ~ sex + agec + educ + (1 | mmsa), 
##     data = merged, na.action = na.omit)
##              coef.est coef.se t value
## (Intercept)    0.27     0.00   59.19 
## sexMale       -0.10     0.00  -55.92 
## agec(24,39]    0.02     0.00    3.77 
## agec(39,59]    0.02     0.00    4.63 
## agec(59,79]   -0.02     0.00   -4.16 
## agec(79,99]   -0.12     0.00  -24.67 
## educ0Prim      0.02     0.01    3.66 
## educ1somehs    0.07     0.00   14.17 
## educ3somecol   0.01     0.00    4.81 
## educ4colgrad  -0.04     0.00  -17.81 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.03    
##  Residual             0.39    
## ---
## number of obs: 204097, groups: mmsa, 136
## AIC = 199222, DIC = 199000.1
## deviance = 199099.1
performance::icc(fit2)
## # Intraclass Correlation Coefficient
## 
##      Adjusted ICC: 0.004
##   Conditional ICC: 0.004

The results show that compared to females, males are less likely to be diagnosed of depressive disorder,persons between ages 59 - 79 and 79 - 99 are less likely than the lower age categories to be diagnosed of depressive disorder. A persons likelihood of been diagnosed of depressive disorder increases with educational attainment ment.That is, a person is more likely to be diagnosed with depressive disorder as he/she progress on the academic ladder. Conversely, persons with college degree are less likely to be diagnosed of depressive disorder. The t-value is greater than 2 and the fixed effect term is 0.27, therefore, we could say that it is significantly different holding all esle constant. the variation at the mmsa level is 0.03 compared 0.39 for individuals. The ICC is 0.004 meaning individuals at the mmsa are different when it comes to thier propensity to be diagnosed of depressive disorder on average holding everything constant.

#3 Compare the models from 1 and 2 using a likelihood ratio test (anova(fit1, fit2, test=”Chisq”)) . Does the inclusion of the individual level variables increase the overall model fit?

wadt <- read_csv("C:/Users/chris/University of Texas at San Antonio/SPRING 2021/DEM 7283 STATISTICS FOR DEMOGRAPHIC DATA 2/states.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   .default = col_character()
## )
## i Use `spec()` for the full column specifications.

Higher level (Macro) variables

anova(fit, fit2, test="Chisq")
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit: depdisorder ~ (1 | mmsa)
## fit2: depdisorder ~ sex + agec + educ + (1 | mmsa)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## fit     3 204508 204539 -102251   204502                         
## fit2   12 199123 199246  -99550   199099 5402.9  9  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Yes, Fit2 has a slightly lower AIC than fit. Chi square for fit2 is statistically significant with a p-value of <2.2e-16. THe inclusion of the individual level increase the overall model fit.