HW 10

load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))

#Data Prep
nams<-names(brfss_17)
head(nams, n=10)

##  [1] "dispcode" "statere1" "safetime" "hhadult"  "genhlth"  "physhlth"
##  [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"

newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(brfss_17)<-newnames

# ##Get TX Cities 
# brfss_17$tx<-NA
# brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1

# ##Remove Non-Responses 
# brfss_17<-brfss_17%>%
#   filter(tx==1, is.na(mmsawt)==F, is.na(hlthpln1)==F)

##Recodes
brfss_17$hlthpln1<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")

brfss_17$educ<-Recode(brfss_17$educa, recodes="1:3='0hs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)

brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0", as.factor=T)

brfss_17$marst<-Recode(brfss_17$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst, ref='married')

brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")

brfss_17$employ<-Recode(brfss_17$employ1, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='Employed')

brfss_17$agec<-cut(brfss_17$age80, breaks=c(0,24,39,59,79,99))
                   
brfss_17$smoke<-Recode(brfss_17$smoker3, recodes="1:2='Current'; 3='Former';4='NeverSmoked'; else=NA", as.factor=T)
brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")

brfss_17$sex<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))

brfss_17$menthlth<-recode(brfss_17$menthlth, recodes = "88=0; 77=NA; 99=NA")

brfss_17$employa <- Recode(brfss_17$employ1, recodes="1:2=1; 2:6=0; else=NA", as.factor=T)

brfss_17$bmi<-ifelse(is.na(brfss_17$bmi5)==T, NA, brfss_17$bmi5/100)

#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")

brfss_17$race_eth<-Recode(brfss_17$racegr3, 
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss_17$race_eth<-relevel(brfss_17$race_eth, ref = "nhwhite")

#Healthy days
brfss_17$healthdys<-Recode(brfss_17$physhlth, recodes = "88=0; 77=NA; 99=NA")
#obese
brfss_17$obese<-ifelse(is.na(brfss_17$bmi)==T, NA, 
                       ifelse(brfss_17$bmi>30,1,0))
#sex
brfss_17$male<-ifelse(brfss_17$sex==1, 1, 0)

#BMI
brfss_17$bmi<-ifelse(is.na(brfss_17$bmi5)==T, NA, brfss_17$bmi5/100)

#Healthy days
brfss_17$healthdays<-Recode(brfss_17$physhlth, recodes = "88=0; 77=NA; 99=NA")

#Healthy mental health days
brfss_17$healthmdays<-Recode(brfss_17$menthlth, recodes = "88=0; 77=NA; 99=NA")

brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")

#brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")



names(brfss_17)

##   [1] "dispcode"    "statere1"    "safetime"    "hhadult"     "genhlth"    
##   [6] "physhlth"    "menthlth"    "poorhlth"    "hlthpln1"    "persdoc2"   
##  [11] "medcost"     "checkup1"    "bphigh4"     "bpmeds"      "cholchk1"   
##  [16] "toldhi2"     "cholmed1"    "cvdinfr4"    "cvdcrhd4"    "cvdstrk3"   
##  [21] "asthma3"     "asthnow"     "chcscncr"    "chcocncr"    "chccopd1"   
##  [26] "havarth3"    "addepev2"    "chckidny"    "diabete3"    "diabage2"   
##  [31] "lmtjoin3"    "arthdis2"    "arthsocl"    "joinpai1"    "sex"        
##  [36] "marital"     "educa"       "renthom1"    "numhhol2"    "numphon2"   
##  [41] "cpdemo1a"    "veteran3"    "employ1"     "children"    "income2"    
##  [46] "internet"    "weight2"     "height3"     "pregnant"    "deaf"       
##  [51] "blind"       "decide"      "diffwalk"    "diffdres"    "diffalon"   
##  [56] "smoke100"    "smokday2"    "stopsmk2"    "lastsmk2"    "usenow3"    
##  [61] "ecigaret"    "ecignow"     "alcday5"     "avedrnk2"    "drnk3ge5"   
##  [66] "maxdrnks"    "fruit2"      "fruitju2"    "fvgreen1"    "frenchf1"   
##  [71] "potatoe1"    "vegetab2"    "exerany2"    "exract11"    "exeroft1"   
##  [76] "exerhmm1"    "exract21"    "exeroft2"    "exerhmm2"    "strength"   
##  [81] "seatbelt"    "flushot6"    "flshtmy2"    "pneuvac3"    "shingle2"   
##  [86] "hivtst6"     "hivtstd3"    "hivrisk5"    "casthdx2"    "casthno2"   
##  [91] "callbckz"    "wdusenow"    "wdinftrk"    "wdhowoft"    "wdshare"    
##  [96] "namtribe"    "namothr"     "urbnrrl"     "ststr"       "impsex"     
## [101] "rfhlth"      "phys14d"     "ment14d"     "hcvu651"     "rfhype5"    
## [106] "cholch1"     "rfchol1"     "michd"       "ltasth1"     "casthm1"    
## [111] "asthms1"     "drdxar1"     "lmtact1"     "lmtwrk1"     "lmtscl1"    
## [116] "prace1"      "mrace1"      "hispanc"     "race"        "raceg21"    
## [121] "racegr3"     "ageg5yr"     "age65yr"     "age80"       "ageg"       
## [126] "wtkg3"       "bmi5"        "bmi5cat"     "rfbmi5"      "educag"     
## [131] "incomg"      "smoker3"     "rfsmok3"     "ecigsts"     "curecig"    
## [136] "drnkany5"    "rfbing5"     "drnkwek"     "rfdrhv5"     "ftjuda2"    
## [141] "frutda2"     "grenda1"     "frnchda"     "potada1"     "vegeda2"    
## [146] "misfrt1"     "misveg1"     "frtres1"     "vegres1"     "frutsu1"    
## [151] "vegesu1"     "frtlt1a"     "veglt1a"     "frt16a"      "veg23a"     
## [156] "fruite1"     "vegete1"     "totinda"     "minac11"     "minac21"    
## [161] "pacat1"      "paindx1"     "pa150r2"     "pa300r2"     "pa30021"    
## [166] "pastrng"     "parec1"      "pastae1"     "rfseat2"     "rfseat3"    
## [171] "flshot6"     "pneumo2"     "aidtst3"     "mmsa"        "mmsawt"     
## [176] "seqno"       "mmsaname"    "educ"        "ins"         "marst"      
## [181] "badhealth"   "employ"      "agec"        "smoke"       "employa"    
## [186] "bmi"         "black"       "white"       "other"       "hispanic"   
## [191] "race_eth"    "healthdys"   "obese"       "male"        "healthdays" 
## [196] "healthmdays"

View(brfss_17)

#1 Conduct a multi-level analysis where you: Estimate the random intercept model with just the higher level units as your predictor (NULL model) report the variance components and ICC.

library(lme4)

## Registered S3 methods overwritten by 'lme4':
##   method                          from
##   cooks.distance.influence.merMod car 
##   influence.merMod                car 
##   dfbeta.influence.merMod         car 
##   dfbetas.influence.merMod        car

library(lmerTest)

## Warning: package 'lmerTest' was built under R version 4.0.5

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(arm)

## Warning: package 'arm' was built under R version 4.0.3

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## 
## arm (Version 1.11-2, built: 2020-7-27)

## Working directory is C:/Users/codar/OneDrive/Documents/Stats II/Data

## 
## Attaching package: 'arm'

## The following object is masked from 'package:car':
## 
##     logit

install.packages('lmerTest')

## Warning: package 'lmerTest' is in use and will not be installed

merged<-brfss_17%>%
  dplyr::select( obese, mmsa, sex, agec, educ, race_eth,smoke, badhealth,bmi, mmsawt, mmsaname, healthdays )%>%
  filter(complete.cases(.))

fit.an<-lm(healthdays~as.factor(mmsa),
           merged)
anova(fit.an)

## Analysis of Variance Table
## 
## Response: healthdays
##                     Df   Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(mmsa)    135    84087  622.87  8.5056 < 2.2e-16 ***
## Residuals       198358 14525898   73.23                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Using just the higher level model (mmsa), my F value is higher than a 1 signaling that there is a large variance. My P-value is significantly small (<2.2e-16) as well. 

library(lme4)
library(lmerTest)
library(arm)

fit<-lmer(healthdays~(1|mmsa),
           data=merged)
arm::display(fit,
             detail=T)

## lmer(formula = healthdays ~ (1 | mmsa), data = merged)
##             coef.est coef.se t value
## (Intercept)  4.30     0.06   68.61  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.68    
##  Residual             8.56    
## ---
## number of obs: 198494, groups: mmsa, 136
## AIC = 1.41585e+06, DIC = 1415839
## deviance = 1415842.3

#manual calculation
(.68^2)/(8.56^2+.68^2)

## [1] 0.006271021

performance::icc(fit)

## # Intraclass Correlation Coefficient
## 
##      Adjusted ICC: 0.006
##   Conditional ICC: 0.006

#Since the ICC can interpret the expected correlation between two randomly drawn units that are in the same group, the ICC for fit tells us that values from the same group are not similar. To use an example, using mmsa, there are low chances of selecting identical individuals.

#2 Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual - level predictors. Report the variance components of the model and describe the results of the model. Report the results for the fixed effects,

fit2<-lmer(healthdays~sex+agec+educ+(1|mmsa),
           data=merged,
           na.action=na.omit)
arm::display(fit2, detail=T)

## lmer(formula = healthdays ~ sex + agec + educ + (1 | mmsa), data = merged, 
##     na.action = na.omit)
##              coef.est coef.se t value
## (Intercept)    5.20     0.12   43.59 
## sexMale       -0.68     0.04  -17.88 
## agec(24,39]    1.00     0.09   11.21 
## agec(39,59]    2.56     0.08   30.69 
## agec(59,79]    3.10     0.08   37.58 
## agec(79,99]    3.31     0.10   32.33 
## educ2hsgrad   -2.11     0.09  -23.78 
## educ3somecol  -2.56     0.09  -29.16 
## educ4colgrad  -4.28     0.09  -50.33 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.49    
##  Residual             8.41    
## ---
## number of obs: 198494, groups: mmsa, 136
## AIC = 1.40905e+06, DIC = 1408955
## deviance = 1408990.7

performance::icc(fit2)

## # Intraclass Correlation Coefficient
## 
##      Adjusted ICC: 0.003
##   Conditional ICC: 0.003

#Fit2 results show that males are less likely to have healthy days than women and each age category is more likely than the lower age category to report less healthy days. The question asked is: Now thinking about your physical health, which includes physical illness and injury, for how many days during the past 30 days was your physical health not good?. The (maybe ironic) result for age is that with higher educational attainment there are more likely to report more unhealthy days. Residual of 8.41 and the icc is now lower than it was for the original fit (.003 compared to .006), which is good because it implies increased variance.

#3 Compare the models from 1 and 2 using a likelihood ratio test (anova(fit1, fit2, test=”Chisq”)) . Does the inclusion of the individual level variables increase the overall model fit?

anova(fit, fit2, test="Chisq")

## refitting model(s) with ML (instead of REML)

## Data: merged
## Models:
## fit: healthdays ~ (1 | mmsa)
## fit2: healthdays ~ sex + agec + educ + (1 | mmsa)
##      npar     AIC     BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## fit     3 1415848 1415879 -707921  1415842                         
## fit2   11 1409013 1409125 -704495  1408991 6851.6  8  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Fit2 has a slightly lower AIC than fit. Chi square for fit2 is statistically  significant with a p-value of <2.2e-16. THe inclusion of the individual level does increase the overall model fit. 

rand(fit2) #shows significant variation in healthy days across cities

## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## healthdays ~ sex + agec + educ + (1 | mmsa)
##            npar  logLik     AIC   LRT Df Pr(>Chisq)    
## <none>       11 -704513 1409049                        
## (1 | mmsa)   10 -704664 1409349 302.2  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#using the LRT, we see that 

  library(sjstats); library(sjPlot)

## 
## Attaching package: 'sjstats'

## The following object is masked from 'package:survey':
## 
##     cv

## The following object is masked from 'package:questionr':
## 
##     prop

## Install package "strengejacke" from GitHub (`devtools::install_github("strengejacke/strengejacke")`) to load all sj-packages at once!

icc(fit2)

## Warning: 'icc' is deprecated.
## Use 'performance::icc()' instead.
## See help("Deprecated")

## # Intraclass Correlation Coefficient
## 
##      Adjusted ICC: 0.003
##   Conditional ICC: 0.003

plot_model(fit,
           type = "re",
           sort.est="sort.all",
           grid=F)

## Warning: `select_vars()` is deprecated as of dplyr 0.8.4.
## Please use `tidyselect::vars_select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

HW 10

Coda Rayo-Garza

4/26/2021