#packages

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(foreign)
#Load BRFSS
brfss2017=read.xport("~/Desktop/DataFolder/brfss.XPT ")
brfss2017%>%
  filter(complete.cases(.))
##   [1] X_STATE   FMONTH    IDATE     IMONTH    IDAY      IYEAR     DISPCODE 
##   [8] SEQNO     X_PSU     CTELENM1  PVTRESD1  COLGHOUS  STATERE1  CELLFON4 
##  [15] LADULT    NUMADULT  NUMMEN    NUMWOMEN  SAFETIME  CTELNUM1  CELLFON5 
##  [22] CADULT    PVTRESD3  CCLGHOUS  CSTATE1   LANDLINE  HHADULT   GENHLTH  
##  [29] PHYSHLTH  MENTHLTH  POORHLTH  HLTHPLN1  PERSDOC2  MEDCOST   CHECKUP1 
##  [36] BPHIGH4   BPMEDS    CHOLCHK1  TOLDHI2   CHOLMED1  CVDINFR4  CVDCRHD4 
##  [43] CVDSTRK3  ASTHMA3   ASTHNOW   CHCSCNCR  CHCOCNCR  CHCCOPD1  HAVARTH3 
##  [50] ADDEPEV2  CHCKIDNY  DIABETE3  DIABAGE2  LMTJOIN3  ARTHDIS2  ARTHSOCL 
##  [57] JOINPAI1  SEX       MARITAL   EDUCA     RENTHOM1  NUMHHOL2  NUMPHON2 
##  [64] CPDEMO1A  VETERAN3  EMPLOY1   CHILDREN  INCOME2   INTERNET  WEIGHT2  
##  [71] HEIGHT3   PREGNANT  DEAF      BLIND     DECIDE    DIFFWALK  DIFFDRES 
##  [78] DIFFALON  SMOKE100  SMOKDAY2  STOPSMK2  LASTSMK2  USENOW3   ECIGARET 
##  [85] ECIGNOW   ALCDAY5   AVEDRNK2  DRNK3GE5  MAXDRNKS  FRUIT2    FRUITJU2 
##  [92] FVGREEN1  FRENCHF1  POTATOE1  VEGETAB2  EXERANY2  EXRACT11  EXEROFT1 
##  [99] EXERHMM1  EXRACT21  EXEROFT2  EXERHMM2  STRENGTH  SEATBELT  FLUSHOT6 
## [106] FLSHTMY2  PNEUVAC3  SHINGLE2  HIVTST6   HIVTSTD3  HIVRISK5  PDIABTST 
## [113] PREDIAB1  INSULIN   BLDSUGAR  FEETCHK2  DOCTDIAB  CHKHEMO3  FEETCHK  
## [120] EYEEXAM   DIABEYE   DIABEDU   COPDCOGH  COPDFLEM  COPDBRTH  COPDBTST 
## [127] COPDSMOK  HAREHAB1  STREHAB1  CVDASPRN  ASPUNSAF  RLIVPAIN  RDUCHART 
## [134] RDUCSTRK  BPEATHBT  BPSALT    BPALCHOL  BPEXER    BPEATADV  BPSLTADV 
## [141] BPALCADV  BPEXRADV  BPMEDADV  BPHI2MR   ARTTODAY  ARTHWGT   ARTHEXER 
## [148] ARTHEDU   ASTHMAGE  ASATTACK  ASERVIST  ASDRVIST  ASRCHKUP  ASACTLIM 
## [155] ASYMPTOM  ASNOSLEP  ASTHMED3  ASINHALR  PAINACT2  QLMENTL2  QLSTRES2 
## [162] QLHLTH2   SLEPTIM1  ADSLEEP   SLEPDAY1  SLEPSNO2  SLEPBRTH  MEDICARE 
## [169] HLTHCVR1  DELAYMED  DLYOTHER  NOCOV121  LSTCOVRG  DRVISITS  MEDSCOS1 
## [176] CARERCVD  MEDBILL1  ASBIALCH  ASBIDRNK  ASBIBING  ASBIADVC  ASBIRDUC 
## [183] CNCRDIFF  CNCRAGE   CNCRTYP1  CSRVTRT2  CSRVDOC1  CSRVSUM   CSRVRTRN 
## [190] CSRVINST  CSRVINSR  CSRVDEIN  CSRVCLIN  CSRVPAIN  CSRVCTL1  SSBSUGR2 
## [197] SSBFRUT3  WTCHSALT  DRADVISE  MARIJANA  USEMRJN1  RSNMRJNA  PFPPRVN2 
## [204] TYPCNTR7  NOBCUSE6  IMFVPLAC  HPVADVC2  HPVADSHT  TETANUS   LCSFIRST 
## [211] LCSLAST   LCSNUMCG  LCSCTSCN  CAREGIV1  CRGVREL2  CRGVLNG1  CRGVHRS1 
## [218] CRGVPRB2  CRGVPERS  CRGVHOUS  CRGVMST2  CRGVEXPT  CIMEMLOS  CDHOUSE  
## [225] CDASSIST  CDHELP    CDSOCIAL  CDDISCUS  EMTSUPRT  LSATISFY  SDHBILLS 
## [232] SDHMOVE   HOWSAFE1  SDHFOOD   SDHMEALS  SDHMONEY  SDHSTRES  SXORIENT 
## [239] TRNSGNDR  FIREARM4  GUNLOAD   LOADULK2  RCSGENDR  RCSRLTN2  CASTHDX2 
## [246] CASTHNO2  QSTVER    QSTLANG   MSCODE    X_STSTR   X_STRWT   X_RAWRAKE
## [253] X_WT2RAKE X_IMPRACE X_CHISPNC X_CRACE1  X_CPRACE  X_CLLCPWT X_DUALUSE
## [260] X_DUALCOR X_LLCPWT2 X_LLCPWT  X_RFHLTH  X_PHYS14D X_MENT14D X_HCVU651
## [267] X_RFHYPE5 X_CHOLCH1 X_RFCHOL1 X_MICHD   X_LTASTH1 X_CASTHM1 X_ASTHMS1
## [274] X_DRDXAR1 X_LMTACT1 X_LMTWRK1 X_LMTSCL1 X_PRACE1  X_MRACE1  X_HISPANC
## [281] X_RACE    X_RACEG21 X_RACEGR3 X_RACE_G1 X_AGEG5YR X_AGE65YR X_AGE80  
## [288] X_AGE_G   HTIN4     HTM4      WTKG3     X_BMI5    X_BMI5CAT X_RFBMI5 
## [295] X_CHLDCNT X_EDUCAG  X_INCOMG  X_SMOKER3 X_RFSMOK3 X_ECIGSTS X_CURECIG
## [302] DRNKANY5  DROCDY3_  X_RFBING5 X_DRNKWEK X_RFDRHV5 FTJUDA2_  FRUTDA2_ 
## [309] GRENDA1_  FRNCHDA_  POTADA1_  VEGEDA2_  X_MISFRT1 X_MISVEG1 X_FRTRES1
## [316] X_VEGRES1 X_FRUTSU1 X_VEGESU1 X_FRTLT1A X_VEGLT1A X_FRT16A  X_VEG23A 
## [323] X_FRUITE1 X_VEGETE1 X_TOTINDA METVL11_  METVL21_  MAXVO2_   FC60_    
## [330] ACTIN11_  ACTIN21_  PADUR1_   PADUR2_   PAFREQ1_  PAFREQ2_  X_MINAC11
## [337] X_MINAC21 STRFREQ_  PAMISS1_  PAMIN11_  PAMIN21_  PA1MIN_   PAVIG11_ 
## [344] PAVIG21_  PA1VIGM_  X_PACAT1  X_PAINDX1 X_PA150R2 X_PA300R2 X_PA30021
## [351] X_PASTRNG X_PAREC1  X_PASTAE1 X_RFSEAT2 X_RFSEAT3 X_FLSHOT6 X_PNEUMO2
## [358] X_AIDTST3
## <0 rows> (or 0-length row.names)
options(survey.lonely.psu = "adjust")
samp<-sample(1:dim(brfss2017)[1],size = 25000)
brfss2017<-brfss2017[samp,]
#Recode variables 
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
brfss2017$activity<-Recode(brfss2017$PA1MIN_,recodes="1500:99999=NA")
brfss2017$activity1<-Recode(brfss2017$activity,recodes="0:299=0;300:1499=1")
#Marital Status; Are you? (marital status): 1 Married, 2 Divorced, 3 Widowed, 4 Seprarated, 5 Never married, 6 Partner, 9 Refuse
brfss2017$marital<-Recode(brfss2017$MARITAL,recodes="1='b-Married';5='a-Single';2:4='c-D/W/S';6='partner';else=NA",as.factor=T)
#Sexual Orientation; Do you consider yourself to be? (sexual orientation): 1 Straight, 2 Gay, 3 Bisexual, 4 Other, 7 Don't know, 9 Refuse
brfss2017$sxorient<-Recode(brfss2017$SXORIENT,recodes="1='a-Straight';2='b-Gay';3='Bisexual';4:7='Other';else=NA",as.factor=T)
#Income 
brfss2017$income<-Recode(brfss2017$INCOME2,recodes="1:3='<$20k';4:5='$20k<$35k';6='35k<50k';7='50k<75';8='>75k';else=NA",as.factor=T)
#Age
brfss2017$age<-Recode(brfss2017$X_AGE_G, recodes="1='18-24';2='25-34';3='35-44';4='45-54';5='55-64';6='65+';else=NA",as.factor=T)
#Education
brfss2017$educ<-Recode(brfss2017$EDUCA, recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
brfss2017$smoke<-Recode(brfss2017$SMOKDAY2,recodes="1:2='Y';3='N';else=NA",as.factor=T)
#Was there a time in the past 12 months when you needed to see a doctor but could not because of cost? 1 Yes, 2 No, 7 Don't know, 9 Refuse
brfss2017$medcost<-Recode(brfss2017$MEDCOST,recodes="1='Y';2='N';else=NA",as.factor=T)
#During the past 30 days, on the days when you drank, about how many drinks did you drink on the average?
brfss2017$drinks<-Recode(brfss2017$AVEDRNK2,recodes="1:5='1-5';5:10='5-10';10:30='10-30';77=NA;99=NA;else=NA",as.factor=T)
#BMI; Four-categories of Body Mass Index (BMI): 1 Underweight, 2 Normal Weight, 3 Overweight, 4 Obese
brfss2017$bmi<-brfss2017$X_BMI5CAT
#Not good mental health days: 1 (0 days), 2 (1-13 days), 3 (14-30 days), 9 Don't know
brfss2017$badmental<-Recode(brfss2017$X_MENT14D,recodes="1=0;2:3=1;9=NA;else=NA",as.factor=T)
#Race/Ethnicity
brfss2017$black<-Recode(brfss2017$racegr3, recodes="2=1; 9=NA; else=0")
brfss2017$white<-Recode(brfss2017$racegr3, recodes="1=1; 9=NA; else=0")
brfss2017$other<-Recode(brfss2017$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss2017$hispanic<-Recode(brfss2017$racegr3, recodes="5=1; 9=NA; else=0")
#Race/Ethnicity; 1 White, 2 Black, 3 Other, 4 Multiracial, 5 Hispanic, 9 Don't know
brfss2017$race<-Recode(brfss2017$X_RACEGR3, recodes="2='black'; 1='awhite'; 3:4='other';5='hispanic'; else=NA",as.factor=T)
brfss2017$race<-relevel(brfss2017$race, ref='awhite')
#Sex; 1 Male, 2 Female
brfss2017$sex<-Recode(brfss2017$SEX,recodes="1=1;2=0;9=NA;else=NA",as.factor=T)
brfss2017$genh<-Recode(brfss2017$GENHLTH,recodes="1:3='aGood';4:5='Bad';else=NA",as.factor=T)
brfss2017$physh<-Recode(brfss2017$PHYSHLTH,recodes="88=0;77=NA;99=NA",as.factor=T)
brfss2017$menth<-Recode(brfss2017$MENTHLTH,recodes="88=0;77=NA;99=NA",as.factor=T)
brfss2017$emosupport<-Recode(brfss2017$EMTSUPRT,recodes="1:2='a-Yes';3='b-Sometimes';4:5='c-No';else=NA",as.factor=T)
brfss2017$satisfed<-Recode(brfss2017$LSATISFY,recodes="1:2='a-Satisfied';3:4='b-Dissatisfied';else=NA",as.factor=T)
brfss2017$employ<-Recode(brfss2017$EMPLOY1,recodes="1:2='Yes';3:4='No';5:8='other';else=NA",as.factor=T)
brfss2017$state<-brfss2017$X_STATE
summary(brfss2017$activity)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0   120.0   240.0   343.1   476.0  1498.0    9665
brfss2017 <- brfss2017 %>% 
  select(activity,activity1,marital,sxorient,age,educ,state)%>%
  filter(complete.cases(.))
library(pander)
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Warning: package 'survey' was built under R version 3.5.2
## Loading required package: grid
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.5.2
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(sjPlot)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.15
## Current Matrix version is 1.2.16
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## #refugeeswelcome
library(ggplot2)
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 3.5.2
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.2
#Basic fixed-effects ANOVA to see if there is any variation in higher level units.
fit.an<-lm(activity~as.factor(state), brfss2017)
anova(fit.an)
## Analysis of Variance Table
## 
## Response: activity
##                    Df    Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(state)   27   5713079  211596  2.2004 0.0003287 ***
## Residuals        7190 691418818   96164                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Use GLM for non-normal outcomes
fit.act<-glm(activity1~as.factor(state),family=binomial,brfss2017)
anova(fit.act,test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: activity1
## 
## Terms added sequentially (first to last)
## 
## 
##                  Df Deviance Resid. Df Resid. Dev Pr(>Chi)   
## NULL                              7217     9913.4            
## as.factor(state) 27   49.064      7190     9864.3 0.005825 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Fit the hierarchical model
library(lme4)
## Warning: package 'lme4' was built under R version 3.5.2
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 3.5.2
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(arm)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is /Users/sdw227/Desktop/SPRING 2019/STATISTICS/homework/multi level models
## 
## Attaching package: 'arm'
## The following object is masked from 'package:car':
## 
##     logit

1) Estimate the random intercept model with just the higher level units as your predictor (NULL model)

# 1) Estimate the random intercept model with just the higher level units as your predictor (NULL model)
fit<-glmer(activity~(1|state),data = brfss2017,na.action=na.omit)
## Warning in glmer(activity ~ (1 | state), data = brfss2017, na.action
## = na.omit): calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
summary(fit)
## Linear mixed model fit by REML ['lmerMod']
## Formula: activity ~ (1 | state)
##    Data: brfss2017
## 
## REML criterion at convergence: 103317.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1983 -0.7324 -0.3053  0.4073  3.7430 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  state    (Intercept)   456.6   21.37  
##  Residual             96171.1  310.11  
## Number of obs: 7218, groups:  state, 28
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  343.360      5.625   61.04
arm::display(fit,detail=T)
## lme4::lmer(formula = activity ~ (1 | state), data = brfss2017, 
##     na.action = na.omit)
##             coef.est coef.se t value
## (Intercept) 343.36     5.63   61.04 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  state    (Intercept)  21.37  
##  Residual             310.11  
## ---
## number of obs: 7218, groups: state, 28
## AIC = 103323, DIC = 103327.8
## deviance = 103322.6
##1a) Report the variance components and ICC.  
#  Within group variance is large.  The between group variance seems reasonable.  
library(sjstats)
## Warning: package 'sjstats' was built under R version 3.5.2
## 
## Attaching package: 'sjstats'
## The following objects are masked from 'package:survey':
## 
##     cv, deff
re_var(fit)
##       Within-group-variance: 96171.065
##      Between-group-variance:  456.577 (state)
meanact<-mean(brfss2017$activity,na.rm=T)
sdact<-sd(brfss2017$activity,na.rm=T)
brfss2017$predact<-sdact*(fitted(fit))+meanact
citymeans<-aggregate(cbind(activity,predact)~state,brfss2017,mean)
head(citymeans,n=10)
##    state activity   predact
## 1      6 384.0847 114444.58
## 2      9 356.7681 109609.68
## 3     10 334.0000 105949.83
## 4     12 351.4855 108946.61
## 5     13 263.2823  97837.70
## 6     15 374.4886 112441.31
## 7     17 349.8344 107937.89
## 8     18 307.4089  99703.25
## 9     19 292.1128  98306.69
## 10    22 325.3643 104826.88

2) Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual level predictors

# 2) Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual level predictors
fit2<-lmer(activity~marital+sxorient+age+educ+(1|state),data = brfss2017,na.action=na.omit)
arm::display(fit2,detail=T)
## lmer(formula = activity ~ marital + sxorient + age + educ + (1 | 
##     state), data = brfss2017, na.action = na.omit)
##                  coef.est coef.se t value
## (Intercept)      203.67    31.77    6.41 
## maritalb-Married  14.23    11.86    1.20 
## maritalc-D/W/S     1.98    13.36    0.15 
## maritalpartner     5.99    21.10    0.28 
## sxorientb-Gay     75.03    28.51    2.63 
## sxorientBisexual  14.64    28.26    0.52 
## sxorientOther    -79.46    36.18   -2.20 
## age25-34         -38.42    19.01   -2.02 
## age35-44         -27.08    19.98   -1.36 
## age45-54          -2.45    19.30   -0.13 
## age55-64          29.62    18.95    1.56 
## age65+            97.99    18.76    5.22 
## educ1somehs       86.75    33.64    2.58 
## educ2hsgrad      100.60    28.18    3.57 
## educ3somecol      98.87    28.10    3.52 
## educ4colgrad     103.77    27.81    3.73 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  state    (Intercept)  22.69  
##  Residual             305.52  
## ---
## number of obs: 7218, groups: state, 28
## AIC = 103016, DIC = 103208.8
## deviance = 103094.4
summary(fit2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: activity ~ marital + sxorient + age + educ + (1 | state)
##    Data: brfss2017
## 
## REML criterion at convergence: 102980
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4570 -0.7049 -0.2855  0.4133  3.9204 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  state    (Intercept)   514.7   22.69  
##  Residual             93344.0  305.52  
## Number of obs: 7218, groups:  state, 28
## 
## Fixed effects:
##                  Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)       203.668     31.770 5489.100   6.411 1.57e-10 ***
## maritalb-Married   14.231     11.862 7201.688   1.200 0.230298    
## maritalc-D/W/S      1.978     13.355 7201.186   0.148 0.882246    
## maritalpartner      5.991     21.103 7195.840   0.284 0.776504    
## sxorientb-Gay      75.028     28.512 7196.873   2.631 0.008520 ** 
## sxorientBisexual   14.640     28.262 7197.537   0.518 0.604466    
## sxorientOther     -79.458     36.177 7200.913  -2.196 0.028098 *  
## age25-34          -38.424     19.005 7193.960  -2.022 0.043241 *  
## age35-44          -27.081     19.982 7187.900  -1.355 0.175375    
## age45-54           -2.449     19.297 7195.211  -0.127 0.899011    
## age55-64           29.616     18.950 7198.338   1.563 0.118123    
## age65+             97.995     18.756 7199.856   5.225 1.79e-07 ***
## educ1somehs        86.752     33.643 7196.672   2.579 0.009940 ** 
## educ2hsgrad       100.601     28.178 7201.683   3.570 0.000359 ***
## educ3somecol       98.866     28.100 7201.381   3.518 0.000437 ***
## educ4colgrad      103.772     27.806 7201.981   3.732 0.000191 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it
# 2a Report the variance components of the model and describe the results.  
#  The within group variance is smaller then in the fit model, but it is still large.  The between group variance has dropped.

re_var(fit2)
##       Within-group-variance: 93344.004
##      Between-group-variance:  514.691 (state)

3) Compare the models from 1 and 2 using a likelihood ratio test to see if our individual level variables explain anything about the outcome.

# 3) Compare the models from 1 and 2 using a likelihood ratio test to see if our individual level variables explain anything about the outcome.
anova(fit,fit2,test="Chisq")
## refitting model(s) with ML (instead of REML)
## Data: brfss2017
## Models:
## fit: activity ~ (1 | state)
## fit2: activity ~ marital + sxorient + age + educ + (1 | state)
##      Df    AIC    BIC logLik deviance  Chisq Chi Df Pr(>Chisq)    
## fit   3 103329 103349 -51661   103323                             
## fit2 18 103130 103254 -51547   103094 228.15     15  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Test to see if the random effect term is significant - The inclusion of the individual level variables is significant and improves the models overall fit.  
rand(fit2)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## activity ~ marital + sxorient + age + educ + (1 | state)
##             npar logLik    AIC    LRT Df Pr(>Chisq)    
## <none>        18 -51490 103016                         
## (1 | state)   17 -51497 103028 13.873  1  0.0001956 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1