load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
#Data Prep
nams<-names(brfss_17)
head(nams, n=10)
## [1] "dispcode" "statere1" "safetime" "hhadult" "genhlth" "physhlth"
## [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(brfss_17)<-newnames
# ##Get TX Cities
# brfss_17$tx<-NA
# brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1
# ##Remove Non-Responses
# brfss_17<-brfss_17%>%
# filter(tx==1, is.na(mmsawt)==F, is.na(hlthpln1)==F)
##Recodes
brfss_17$hlthpln1<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
brfss_17$educ<-Recode(brfss_17$educa, recodes="1:3='0hs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0", as.factor=T)
brfss_17$marst<-Recode(brfss_17$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst, ref='married')
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")
brfss_17$employ<-Recode(brfss_17$employ1, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='Employed')
brfss_17$agec<-cut(brfss_17$age80, breaks=c(0,24,39,59,79,99))
brfss_17$smoke<-Recode(brfss_17$smoker3, recodes="1:2='Current'; 3='Former';4='NeverSmoked'; else=NA", as.factor=T)
brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")
brfss_17$sex<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))
brfss_17$menthlth<-recode(brfss_17$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss_17$employa <- Recode(brfss_17$employ1, recodes="1:2=1; 2:6=0; else=NA", as.factor=T)
brfss_17$bmi<-ifelse(is.na(brfss_17$bmi5)==T, NA, brfss_17$bmi5/100)
#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")
brfss_17$race_eth<-Recode(brfss_17$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss_17$race_eth<-relevel(brfss_17$race_eth, ref = "nhwhite")
#Healthy days
brfss_17$healthdys<-Recode(brfss_17$physhlth, recodes = "88=0; 77=NA; 99=NA")
#obese
brfss_17$obese<-ifelse(is.na(brfss_17$bmi)==T, NA,
ifelse(brfss_17$bmi>30,1,0))
#sex
brfss_17$male<-ifelse(brfss_17$sex==1, 1, 0)
#BMI
brfss_17$bmi<-ifelse(is.na(brfss_17$bmi5)==T, NA, brfss_17$bmi5/100)
#Healthy days
brfss_17$healthdays<-Recode(brfss_17$physhlth, recodes = "88=0; 77=NA; 99=NA")
#Healthy mental health days
brfss_17$healthmdays<-Recode(brfss_17$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")
names(brfss_17)
## [1] "dispcode" "statere1" "safetime" "hhadult" "genhlth"
## [6] "physhlth" "menthlth" "poorhlth" "hlthpln1" "persdoc2"
## [11] "medcost" "checkup1" "bphigh4" "bpmeds" "cholchk1"
## [16] "toldhi2" "cholmed1" "cvdinfr4" "cvdcrhd4" "cvdstrk3"
## [21] "asthma3" "asthnow" "chcscncr" "chcocncr" "chccopd1"
## [26] "havarth3" "addepev2" "chckidny" "diabete3" "diabage2"
## [31] "lmtjoin3" "arthdis2" "arthsocl" "joinpai1" "sex"
## [36] "marital" "educa" "renthom1" "numhhol2" "numphon2"
## [41] "cpdemo1a" "veteran3" "employ1" "children" "income2"
## [46] "internet" "weight2" "height3" "pregnant" "deaf"
## [51] "blind" "decide" "diffwalk" "diffdres" "diffalon"
## [56] "smoke100" "smokday2" "stopsmk2" "lastsmk2" "usenow3"
## [61] "ecigaret" "ecignow" "alcday5" "avedrnk2" "drnk3ge5"
## [66] "maxdrnks" "fruit2" "fruitju2" "fvgreen1" "frenchf1"
## [71] "potatoe1" "vegetab2" "exerany2" "exract11" "exeroft1"
## [76] "exerhmm1" "exract21" "exeroft2" "exerhmm2" "strength"
## [81] "seatbelt" "flushot6" "flshtmy2" "pneuvac3" "shingle2"
## [86] "hivtst6" "hivtstd3" "hivrisk5" "casthdx2" "casthno2"
## [91] "callbckz" "wdusenow" "wdinftrk" "wdhowoft" "wdshare"
## [96] "namtribe" "namothr" "urbnrrl" "ststr" "impsex"
## [101] "rfhlth" "phys14d" "ment14d" "hcvu651" "rfhype5"
## [106] "cholch1" "rfchol1" "michd" "ltasth1" "casthm1"
## [111] "asthms1" "drdxar1" "lmtact1" "lmtwrk1" "lmtscl1"
## [116] "prace1" "mrace1" "hispanc" "race" "raceg21"
## [121] "racegr3" "ageg5yr" "age65yr" "age80" "ageg"
## [126] "wtkg3" "bmi5" "bmi5cat" "rfbmi5" "educag"
## [131] "incomg" "smoker3" "rfsmok3" "ecigsts" "curecig"
## [136] "drnkany5" "rfbing5" "drnkwek" "rfdrhv5" "ftjuda2"
## [141] "frutda2" "grenda1" "frnchda" "potada1" "vegeda2"
## [146] "misfrt1" "misveg1" "frtres1" "vegres1" "frutsu1"
## [151] "vegesu1" "frtlt1a" "veglt1a" "frt16a" "veg23a"
## [156] "fruite1" "vegete1" "totinda" "minac11" "minac21"
## [161] "pacat1" "paindx1" "pa150r2" "pa300r2" "pa30021"
## [166] "pastrng" "parec1" "pastae1" "rfseat2" "rfseat3"
## [171] "flshot6" "pneumo2" "aidtst3" "mmsa" "mmsawt"
## [176] "seqno" "mmsaname" "educ" "ins" "marst"
## [181] "badhealth" "employ" "agec" "smoke" "employa"
## [186] "bmi" "black" "white" "other" "hispanic"
## [191] "race_eth" "healthdys" "obese" "male" "healthdays"
## [196] "healthmdays"
View(brfss_17)
#1 Conduct a multi-level analysis where you: Estimate the random intercept model with just the higher level units as your predictor (NULL model) report the variance components and ICC.
library(lme4)
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 4.0.5
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(arm)
## Warning: package 'arm' was built under R version 4.0.3
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## arm (Version 1.11-2, built: 2020-7-27)
## Working directory is C:/Users/codar/OneDrive/Documents/Stats II/Data
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
install.packages('lmerTest')
## Warning: package 'lmerTest' is in use and will not be installed
merged<-brfss_17%>%
dplyr::select( obese, mmsa, sex, agec, educ, race_eth,smoke, badhealth,bmi, mmsawt, mmsaname, healthdays )%>%
filter(complete.cases(.))
fit.an<-lm(healthdays~as.factor(mmsa),
merged)
anova(fit.an)
## Analysis of Variance Table
##
## Response: healthdays
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(mmsa) 135 84087 622.87 8.5056 < 2.2e-16 ***
## Residuals 198358 14525898 73.23
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Using just the higher level model (mmsa), my F value is higher than a 1 signaling that there is a large variance. My P-value is significantly small (<2.2e-16) as well.
library(lme4)
library(lmerTest)
library(arm)
fit<-lmer(healthdays~(1|mmsa),
data=merged)
arm::display(fit,
detail=T)
## lmer(formula = healthdays ~ (1 | mmsa), data = merged)
## coef.est coef.se t value
## (Intercept) 4.30 0.06 68.61
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.68
## Residual 8.56
## ---
## number of obs: 198494, groups: mmsa, 136
## AIC = 1.41585e+06, DIC = 1415839
## deviance = 1415842.3
#manual calculation
(.68^2)/(8.56^2+.68^2)
## [1] 0.006271021
performance::icc(fit)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.006
## Conditional ICC: 0.006
#Since the ICC can interpret the expected correlation between two randomly drawn units that are in the same group, the ICC for fit tells us that values from the same group are not similar. To use an example, using mmsa, there are low chances of selecting identical individuals.
#2 Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual - level predictors. Report the variance components of the model and describe the results of the model. Report the results for the fixed effects,
fit2<-lmer(healthdays~sex+agec+educ+(1|mmsa),
data=merged,
na.action=na.omit)
arm::display(fit2, detail=T)
## lmer(formula = healthdays ~ sex + agec + educ + (1 | mmsa), data = merged,
## na.action = na.omit)
## coef.est coef.se t value
## (Intercept) 5.20 0.12 43.59
## sexMale -0.68 0.04 -17.88
## agec(24,39] 1.00 0.09 11.21
## agec(39,59] 2.56 0.08 30.69
## agec(59,79] 3.10 0.08 37.58
## agec(79,99] 3.31 0.10 32.33
## educ2hsgrad -2.11 0.09 -23.78
## educ3somecol -2.56 0.09 -29.16
## educ4colgrad -4.28 0.09 -50.33
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.49
## Residual 8.41
## ---
## number of obs: 198494, groups: mmsa, 136
## AIC = 1.40905e+06, DIC = 1408955
## deviance = 1408990.7
performance::icc(fit2)
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.003
## Conditional ICC: 0.003
#Fit2 results show that males are less likely to have healthy days than women and each age category is more likely than the lower age category to report less healthy days. The question asked is: Now thinking about your physical health, which includes physical illness and injury, for how many days during the past 30 days was your physical health not good?. The (maybe ironic) result for age is that with higher educational attainment there are more likely to report more unhealthy days. Residual of 8.41 and the icc is now lower than it was for the original fit (.003 compared to .006), which is good because it implies increased variance.
#3 Compare the models from 1 and 2 using a likelihood ratio test (anova(fit1, fit2, test=”Chisq”)) . Does the inclusion of the individual level variables increase the overall model fit?
anova(fit, fit2, test="Chisq")
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit: healthdays ~ (1 | mmsa)
## fit2: healthdays ~ sex + agec + educ + (1 | mmsa)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## fit 3 1415848 1415879 -707921 1415842
## fit2 11 1409013 1409125 -704495 1408991 6851.6 8 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Fit2 has a slightly lower AIC than fit. Chi square for fit2 is statistically significant with a p-value of <2.2e-16. THe inclusion of the individual level does increase the overall model fit.
rand(fit2) #shows significant variation in healthy days across cities
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## healthdays ~ sex + agec + educ + (1 | mmsa)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 11 -704513 1409049
## (1 | mmsa) 10 -704664 1409349 302.2 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#using the LRT, we see that
library(sjstats); library(sjPlot)
##
## Attaching package: 'sjstats'
## The following object is masked from 'package:survey':
##
## cv
## The following object is masked from 'package:questionr':
##
## prop
## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!
icc(fit2)
## Warning: 'icc' is deprecated.
## Use 'performance::icc()' instead.
## See help("Deprecated")
## # Intraclass Correlation Coefficient
##
## Adjusted ICC: 0.003
## Conditional ICC: 0.003
plot_model(fit,
type = "re",
sort.est="sort.all",
grid=F)
## Warning: `select_vars()` is deprecated as of dplyr 0.8.4.
## Please use `tidyselect::vars_select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.