#load packages
library(car)
library(stargazer)
library(survey)
library(sjPlot)
library(ggplot2)
library(pander)
library(knitr)
library(tidycensus)
library(dplyr)
library(haven)
BRFSS19<-read_xpt("C:/Users/adolp/Desktop/Statistics 2/BRFSS/MMSA2019.XPT")
set.seed(1234)
samp<-sample(1:dim(BRFSS19)[1], size = 40000) #smaller sample for brevity
BRFSS19<-BRFSS19[samp,]
#Fix BRFSS names
nams<-names(BRFSS19)
head(BRFSS19, n=10)
## # A tibble: 10 x 179
##    DISPCODE STATERE1 CELPHONE LADULT1 COLGSEX LANDSEX RESPSLCT SAFETIME CADULT1
##       <dbl>    <dbl>    <dbl>   <dbl>   <dbl>   <dbl>    <dbl>    <dbl>   <dbl>
##  1     1100       NA       NA      NA      NA      NA       NA        1       1
##  2     1100        1        2       1      NA       1       NA       NA      NA
##  3     1100        1        2       1      NA      NA        1       NA      NA
##  4     1200       NA       NA      NA      NA      NA       NA        1       1
##  5     1100        1        2       1      NA       2       NA       NA      NA
##  6     1100       NA       NA      NA      NA      NA       NA        1       1
##  7     1200       NA       NA      NA      NA      NA       NA        1       1
##  8     1100       NA       NA      NA      NA      NA       NA        1       1
##  9     1100        1        2       1      NA       2       NA       NA      NA
## 10     1100        1        2       1      NA      NA        2       NA      NA
## # ... with 170 more variables: CELLSEX <dbl>, HHADULT <dbl>, SEXVAR <dbl>,
## #   GENHLTH <dbl>, PHYSHLTH <dbl>, MENTHLTH <dbl>, POORHLTH <dbl>,
## #   HLTHPLN1 <dbl>, PERSDOC2 <dbl>, MEDCOST <dbl>, CHECKUP1 <dbl>,
## #   BPHIGH4 <dbl>, BPMEDS <dbl>, CHOLCHK2 <dbl>, TOLDHI2 <dbl>, CHOLMED2 <dbl>,
## #   CVDINFR4 <dbl>, CVDCRHD4 <dbl>, CVDSTRK3 <dbl>, ASTHMA3 <dbl>,
## #   ASTHNOW <dbl>, CHCSCNCR <dbl>, CHCOCNCR <dbl>, CHCCOPD2 <dbl>,
## #   ADDEPEV3 <dbl>, CHCKDNY2 <dbl>, DIABETE4 <dbl>, DIABAGE3 <dbl>,
## #   HAVARTH4 <dbl>, ARTHEXER <dbl>, ARTHEDU <dbl>, LMTJOIN3 <dbl>,
## #   ARTHDIS2 <dbl>, JOINPAI2 <dbl>, MARITAL <dbl>, EDUCA <dbl>, RENTHOM1 <dbl>,
## #   NUMHHOL3 <dbl>, NUMPHON3 <dbl>, CPDEMO1B <dbl>, VETERAN3 <dbl>,
## #   EMPLOY1 <dbl>, CHILDREN <dbl>, INCOME2 <dbl>, WEIGHT2 <dbl>, HEIGHT3 <dbl>,
## #   PREGNANT <dbl>, DEAF <dbl>, BLIND <dbl>, DECIDE <dbl>, DIFFWALK <dbl>,
## #   DIFFDRES <dbl>, DIFFALON <dbl>, SMOKE100 <dbl>, SMOKDAY2 <dbl>,
## #   STOPSMK2 <dbl>, LASTSMK2 <dbl>, USENOW3 <dbl>, ALCDAY5 <dbl>,
## #   AVEDRNK3 <dbl>, DRNK3GE5 <dbl>, MAXDRNKS <dbl>, EXERANY2 <dbl>,
## #   EXRACT11 <dbl>, EXEROFT1 <dbl>, EXERHMM1 <dbl>, EXRACT21 <dbl>,
## #   EXEROFT2 <dbl>, EXERHMM2 <dbl>, STRENGTH <dbl>, FRUIT2 <dbl>,
## #   FRUITJU2 <dbl>, FVGREEN1 <dbl>, FRENCHF1 <dbl>, POTATOE1 <dbl>,
## #   VEGETAB2 <dbl>, FLUSHOT7 <dbl>, FLSHTMY3 <dbl>, TETANUS1 <dbl>,
## #   PNEUVAC4 <dbl>, HIVTST7 <dbl>, HIVTSTD3 <dbl>, HIVRISK5 <dbl>,
## #   _STSTR <dbl>, _IMPSEX <dbl>, CAGEG <dbl>, _RFHLTH <dbl>, _PHYS14D <dbl>,
## #   _MENT14D <dbl>, _HCVU651 <dbl>, _RFHYPE5 <dbl>, _CHOLCH2 <dbl>,
## #   _RFCHOL2 <dbl>, _MICHD <dbl>, _LTASTH1 <dbl>, _CASTHM1 <dbl>,
## #   _ASTHMS1 <dbl>, _DRDXAR2 <dbl>, _LMTACT2 <dbl>, _LMTWRK2 <dbl>, ...
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(BRFSS19)<-newnames
BRFSS19$male<-Recode(BRFSS19$sexvar, recodes ="1=1;2=0")
BRFSS19<-BRFSS19%>%
  select(mmsaname, mmsa,menthlth,male,drnkwk1,minac21) %>%
  filter(complete.cases(.))
head(data.frame(name=table(BRFSS19$mmsaname), 
                id=unique(BRFSS19$mmsa)))
##                                                    name.Var1 name.Freq    id
## 1                Aberdeen, SD, Micropolitan Statistical Area        68 33460
## 2       Aguadilla-Isabela, PR, Metropolitan Statistical Area        51 17820
## 3                   Akron, OH, Metropolitan Statistical Area        69 39300
## 4 Albany-Schenectady-Troy, NY, Metropolitan Statistical Area       105 36084
## 5             Albuquerque, NM, Metropolitan Statistical Area       208 28140
## 6               Anchorage, AK, Metropolitan Statistical Area       118 30100
#Number of MSAs 
length(table(BRFSS19$mmsa))
## [1] 136
fit.an<-lm(menthlth~as.factor(mmsa), BRFSS19)
anova(fit.an)
## Analysis of Variance Table
## 
## Response: menthlth
##                    Df   Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(mmsa)   135   438865  3250.8  2.2936 1.278e-15 ***
## Residuals       26285 37254455  1417.3                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lme4)
## Warning: package 'lme4' was built under R version 4.0.4
library(lmerTest)
library(arm)
fit<-lmer(menthlth~(1|mmsa),
           data=BRFSS19)
arm::display(fit,
             detail=T)
## lmer(formula = menthlth ~ (1 | mmsa), data = BRFSS19)
##             coef.est coef.se t value
## (Intercept)  60.75     0.38  159.62 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept)  3.11   
##  Residual             37.65   
## ---
## number of obs: 26421, groups: mmsa, 136
## AIC = 266808, DIC = 266802.1
## deviance = 266802.2
(3.11^2)/(37.65^2 + 3.11^2)
## [1] 0.006777002
fit2<-lmer(menthlth~male+drnkwk1+minac21+(1|mmsa),
           data=BRFSS19)
## Warning: Some predictor variables are on very different scales: consider
## rescaling

## Warning: Some predictor variables are on very different scales: consider
## rescaling
arm::display(fit2, detail=T)
## lmer(formula = menthlth ~ male + drnkwk1 + minac21 + (1 | mmsa), 
##     data = BRFSS19)
##             coef.est coef.se t value
## (Intercept)  57.48     0.45  127.82 
## male          6.62     0.46   14.29 
## drnkwk1       0.00     0.00    1.71 
## minac21       0.00     0.00    1.73 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept)  3.21   
##  Residual             37.49   
## ---
## number of obs: 26421, groups: mmsa, 136
## AIC = 266637, DIC = 266557.6
## deviance = 266591.1
(3.21^2)/(37.49^2 + 3.21^2)
## [1] 0.007277913
anova(fit, fit2)
## refitting model(s) with ML (instead of REML)
## Data: BRFSS19
## Models:
## fit: menthlth ~ (1 | mmsa)
## fit2: menthlth ~ male + drnkwk1 + minac21 + (1 | mmsa)
##      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## fit     3 266808 266833 -133401   266802                         
## fit2    6 266603 266652 -133296   266591 211.08  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Q1: Its own variance (MMSA) = 3.11 Between group variance (Residual) = 37.65 ICC = 0.007 Q2: Its own variance (MMSA) = 3.21 Between group variance (Residual) = 37.49 ICC = 0.007 Respondents identifying as a male had significantly higher average number of mental health day than females. The number of alcohol drinks during the week and the number of minutes of physical activity during the week did not significantly predict the number of mental health days. Q3: Yes, the model that includes individual level predictors fits the data a lot better than the null model based on the AIC value.