Homework 6

library(sjPlot)

## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.13
## Current Matrix version is 1.2.12
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package

## #refugeeswelcome

library(lme4)

## Loading required package: Matrix

library(haven)
library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)
library(broom)
library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.1. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)
library(pander)
library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(arm)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## 
## arm (Version 1.9-3, built: 2016-11-21)

## Working directory is C:/Users/tobik/OneDrive/Documents/DEM7383_Stats

## 
## Attaching package: 'arm'

## The following object is masked from 'package:car':
## 
##     logit

#importing data
load("C:/Users/tobik/OneDrive/Documents/DEM7383_Stats/brfss16.Rdata")
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss16)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",replacement =  "",x =  nams)
names(brfss16)<-tolower(newnames)

Checking where to find missing values:

colSums(is.na(brfss16))

##    state   fmonth    idate   imonth     iday    iyear dispcode    seqno 
##        0        0        0        0        0        0        0        0 
##      psu ctelenm1 pvtresd1 colghous stateres cellfon4   ladult numadult 
##        0   233290   234038   486262   236709   234038   486262   234051 
##   nummen numwomen ctelnum1 cellfon5   cadult pvtresd3 cclghous  cstate1 
##   234046   234046   252265   252266   252271   252265   485175   246342 
## landline  hhadult  genhlth physhlth menthlth poorhlth hlthpln1 persdoc2 
##   251640   260001        5        3        1   235696        0        1 
##  medcost checkup1 exerany2 sleptim1 cvdinfr4 cvdcrhd4 cvdstrk3  asthma3 
##        2        3        6        2        3        2        2        2 
##  asthnow chcscncr chcocncr chccopd1 havarth3 addepev2 chckidny diabete3 
##   420713        2        2        2        3        3        3        3 
## diabage2 lastden3 rmvteth3      sex  marital    educa renthom1 numhhol2 
##   420251        6        4        0       11        6        9   234055 
## numphon2  cpdemo1 veteran3  employ1 children  income2 internet  weight2 
##   471160   234059       65       73      121     3786     4920     6178 
##  height3 pregnant     deaf    blind   decide diffwalk diffdres diffalon 
##     6809   416898    12124    12987    13703    14310    14678    15267 
## smoke100 smokday2 stopsmk2 lastsmk2  usenow3 ecigaret  ecignow  alcday5 
##    16062   282147   417415   351519    16686    17418   411720    18348 
## avedrnk2 drnk3ge5 maxdrnks flushot6 flshtmy2 pneuvac3  tetanus fall12mn 
##   249223   249639   250034    22107   281847    23076    23573   146679 
## fallinj2 seatbelt drnkdri2   hadmam  howlong  hadpap2 lastpap2  hpvtest 
##   383838    25167   247187   225296   279045   225503   244482   226142 
## hplsttst hadhyst2 pcpsaad2 pcpsadi1 pcpsare1 psatest1  psatime pcpsars1 
##   419058   271408   334583   334711   334772   334840   398978   399044 
## bldstool lstblds3 hadsigm3 hadsgco1 lastsig3  hivtst6 hivtstd3 hivrisk4 
##   181611   381928   181924   264494   264557    32982   350195    34235 
## pdiabtst prediab1  insulin bldsugar feetchk2 doctdiab chkhemo3  feetchk 
##   294693   294692   476768   476768   476768   476769   476768   476814 
##  eyeexam  diabeye  diabedu painact2 qlmentl2 qlstres2  qlhlth2 medicare 
##   476768   476768   476768   476964   476971   476979   476989   432666 
## hlthcvr1 delaymed dlyother nocov121 lstcovrg drvisits medscost carercvd 
##   432667   429001        0   432668   482616   428980   428980   428980 
## medbill1 medadvic undrstnd  written caregiv1 crgvrel1 crgvlng1 crgvhrs1 
##   428980   395078   395157   395297   409710   471117   471137   471165 
## crgvprb2 crgvpers crgvhous crgvmst2 crgvexpt cimemlos  cdhouse cdassist 
##   471192   471216   471231   471264   425077   410160   477890   477897 
##   cdhelp cdsocial cddiscus ssbsugr2 ssbfrut2 calrinfo marijana usemrjna 
##   483805   477903   477913   418152   418236   474791   379483   480901 
## asthmage asattack aservist asdrvist asrchkup asactlim asymptom asnoslep 
##   485748   485931   486129   486130   485932   485933   485936   486041 
## asthmed3 asinhalr imfvplac hpvadvc2 hpvadsht shingle2 numburn2 cncrdiff 
##   485937   485937   470779   462620   483878   468928   462908   477896 
##  cncrage cncrtyp1 csrvtrt1 csrvdoc1  csrvsum csrvrtrn csrvinst csrvinsr 
##   478004   478012   478010   480151   480158   480161   482161   480165 
## csrvdein csrvclin csrvpain csrvctl1 profexam lengexam pcpsade1 pcdmdecn 
##   480166   480167   480170   485884   479759   480304   486303        0 
## sxorient trnsgndr rcsgendr rcsrltn2 casthdx2 casthno2 emtsuprt lsatisfy 
##   280869   281030   416701   416947   422065   478587   456766   456780 
## qlactlm2 useequip   qstver  qstlang   mscode    ststr    strwt  rawrake 
##   279078   279477        0       14   240028        0        0        0 
##  wt2rake  chispnc   crace1   cprace  cllcpwt  dualuse  dualcor  llcpwt2 
##        0   109413   412360   412360   416724        0   196720        0 
##   llcpwt   rfhlth  phys14d  ment14d  hcvu651  totinda    michd  ltasth1 
##        0        0        0        0        0        0     4125        0 
##  casthm1  asthms1  drdxar1  exteth2  alteth2  denvst2   prace1   mrace1 
##        0        0     2477        0   308872        0        4        4 
##  hispanc     race  raceg21  racegr3   raceg1  ageg5yr  age65yr    age80 
##        0        0        0        0     8683        0        0        0 
##     ageg    htin4     htm4    wtkg3     bmi5  bmi5cat   rfbmi5  chldcnt 
##        0    19133    16972    33530    39616    39616        0        0 
##   educag   incomg  smoker3  rfsmok3  ecigsts  curecig drnkany5  drocdy3 
##        0        0        0        0        0        0        0        0 
##  rfbing5  drnkwek  rfdrhv5  flshot6  pneumo2  rfseat2  rfseat3  drnkdrv 
##        0        0        0   308872   308872        0        0        0 
##  rfmam2y  mam5021  rfpap33  rfpsa21  rfblds3  col10yr  hfob3yr    fs5yr 
##   263226   354260   383917   334448   251512   256162   251512   375290 
##   fobtfs   crcrec  aidtst3 
##   272505   255147    32982

Checking on Drinking Bx

#Individual Levels

#continuous outcome: number of alcoholic drinks past 30 days
brfss16$numbofdrinks<-Recode(brfss16$avedrnk2, recodes="77=NA;99=NA", as.factor=F)

#income grouping
brfss16$inc<-Recode(brfss16$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor= T)
brfss16$inc<-as.ordered(brfss16$inc)


#education level
brfss16$educ<-Recode(brfss16$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)

#race/ethnicity
brfss16$raceeth<-Recode(brfss16$racegr3, recodes="1='1_White';  2='2_Black'; 5='3_Hispanic'; 3:4='4_other'; else=NA",as.factor=T)

#Higher Level

#mscode density of living arrangements
brfss16$urbanstyle<-Recode(brfss16$mscode, recodes=
            "1='Inner_City';
            2='City_Skirts';
            3='City_Couty';
            5='Rural_County';
            else=NA",
            as.factor=T)
brfss162<-brfss16%>%
  dplyr::select(numbofdrinks,inc,educ,raceeth,urbanstyle)%>%
  filter(complete.cases(.))

ANOVA

fit.anova<-lm(numbofdrinks~as.factor(urbanstyle), data=brfss16)
anova(fit.anova)

## Analysis of Variance Table
## 
## Response: numbofdrinks
##                           Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(urbanstyle)      3    547 182.424  45.344 < 2.2e-16 ***
## Residuals             110753 445569   4.023                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Yay, I found significance.

2) Estimating the multilevel

brfss16.fit<-lmer(numbofdrinks~inc+educ+raceeth+(1|urbanstyle), brfss16)
rand(brfss16.fit)

## Analysis of Random effects Table:
##            Chi.sq Chi.DF p.value    
## urbanstyle   53.2      1   3e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Without individual level variables

display(brfss16.fit, detail=T)

## lme4::lmer(formula = numbofdrinks ~ inc + educ + raceeth + (1 | 
##     urbanstyle), data = brfss16)
##                   coef.est coef.se t value
## (Intercept)         2.72     0.08   34.22 
## inc.L              -0.18     0.02   -8.20 
## inc.Q               0.17     0.02    8.04 
## inc.C              -0.02     0.02   -1.00 
## inc^4              -0.01     0.02   -0.59 
## educ1somehs        -0.08     0.09   -0.98 
## educ2hsgrad        -0.50     0.08   -6.70 
## educ3somecol       -0.74     0.08   -9.90 
## educ4colgrad       -0.98     0.08  -13.06 
## raceeth2_Black      0.05     0.03    1.73 
## raceeth3_Hispanic   0.46     0.04   12.90 
## raceeth4_other      0.50     0.03   14.42 
## 
## Error terms:
##  Groups     Name        Std.Dev.
##  urbanstyle (Intercept) 0.06    
##  Residual               2.00    
## ---
## number of obs: 95224, groups: urbanstyle, 4
## AIC = 402470, DIC = 402311
## deviance = 402376.3

Variance Components

#it can be a little hairy to get it, but it can be done using various part of VarCorr()
ICC1<-VarCorr(brfss16.fit)$urbanstyle[1]/(VarCorr(brfss16.fit)$urbanstyle[1]+attr(VarCorr(brfss16.fit), "sc"))
ICC1

## [1] 0.001683403

So, the variation only explains less than 1% (~0.17%) in number of drinks according to the urban living space.

Random Intercept Model

brfss16.fit2<-lmer(numbofdrinks~inc+educ+raceeth+(raceeth|urbanstyle), brfss16)
display(brfss16.fit2, detail=T)

## lme4::lmer(formula = numbofdrinks ~ inc + educ + raceeth + (raceeth | 
##     urbanstyle), data = brfss16)
##                   coef.est coef.se t value
## (Intercept)         2.71     0.08   34.68 
## inc.L              -0.17     0.02   -7.87 
## inc.Q               0.17     0.02    7.90 
## inc.C              -0.02     0.02   -0.91 
## inc^4              -0.01     0.02   -0.57 
## educ1somehs        -0.08     0.09   -0.96 
## educ2hsgrad        -0.50     0.08   -6.63 
## educ3somecol       -0.74     0.08   -9.82 
## educ4colgrad       -0.97     0.08  -12.97 
## raceeth2_Black      0.03     0.07    0.40 
## raceeth3_Hispanic   0.44     0.05    9.30 
## raceeth4_other      0.47     0.15    3.06 
## 
## Error terms:
##  Groups     Name              Std.Dev. Corr              
##  urbanstyle (Intercept)       0.05                       
##             raceeth2_Black    0.13     -0.23             
##             raceeth3_Hispanic 0.06     -0.60  0.92       
##             raceeth4_other    0.30      0.67  0.57  0.20 
##  Residual                     2.00                       
## ---
## number of obs: 95224, groups: urbanstyle, 4
## AIC = 402414, DIC = 402243
## deviance = 402305.7

#it can be a little hairy to get it, but it can be done using various part of VarCorr()
ICC2<-VarCorr(brfss16.fit2)$urbanstyle[1]/(VarCorr(brfss16.fit2)$urbanstyle[1]+attr(VarCorr(brfss16.fit2), "sc"))
ICC2

## [1] 0.001311932

Here, the variation only explains also less than 1% (~0.13%) in number of drinks according to the urban living space.

anova(brfss16.fit,brfss16.fit2, test="Chisq")

## refitting model(s) with ML (instead of REML)

## Data: brfss16
## Models:
## object: numbofdrinks ~ inc + educ + raceeth + (1 | urbanstyle)
## ..1: numbofdrinks ~ inc + educ + raceeth + (raceeth | urbanstyle)
##        Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
## object 14 402404 402537 -201188   402376                             
## ..1    23 402352 402569 -201153   402306 70.662      9  1.129e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Conclusion:

The second model fits better (Random Intercept / Random Slope).