library(sjPlot)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.13
## Current Matrix version is 1.2.12
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## #refugeeswelcome
library(lme4)
## Loading required package: Matrix
library(haven)
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(broom)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.1. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(pander)
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(arm)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## arm (Version 1.9-3, built: 2016-11-21)
## Working directory is C:/Users/tobik/OneDrive/Documents/DEM7383_Stats
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
#importing data
load("C:/Users/tobik/OneDrive/Documents/DEM7383_Stats/brfss16.Rdata")
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss16)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",replacement = "",x = nams)
names(brfss16)<-tolower(newnames)
Checking where to find missing values:
colSums(is.na(brfss16))
## state fmonth idate imonth iday iyear dispcode seqno
## 0 0 0 0 0 0 0 0
## psu ctelenm1 pvtresd1 colghous stateres cellfon4 ladult numadult
## 0 233290 234038 486262 236709 234038 486262 234051
## nummen numwomen ctelnum1 cellfon5 cadult pvtresd3 cclghous cstate1
## 234046 234046 252265 252266 252271 252265 485175 246342
## landline hhadult genhlth physhlth menthlth poorhlth hlthpln1 persdoc2
## 251640 260001 5 3 1 235696 0 1
## medcost checkup1 exerany2 sleptim1 cvdinfr4 cvdcrhd4 cvdstrk3 asthma3
## 2 3 6 2 3 2 2 2
## asthnow chcscncr chcocncr chccopd1 havarth3 addepev2 chckidny diabete3
## 420713 2 2 2 3 3 3 3
## diabage2 lastden3 rmvteth3 sex marital educa renthom1 numhhol2
## 420251 6 4 0 11 6 9 234055
## numphon2 cpdemo1 veteran3 employ1 children income2 internet weight2
## 471160 234059 65 73 121 3786 4920 6178
## height3 pregnant deaf blind decide diffwalk diffdres diffalon
## 6809 416898 12124 12987 13703 14310 14678 15267
## smoke100 smokday2 stopsmk2 lastsmk2 usenow3 ecigaret ecignow alcday5
## 16062 282147 417415 351519 16686 17418 411720 18348
## avedrnk2 drnk3ge5 maxdrnks flushot6 flshtmy2 pneuvac3 tetanus fall12mn
## 249223 249639 250034 22107 281847 23076 23573 146679
## fallinj2 seatbelt drnkdri2 hadmam howlong hadpap2 lastpap2 hpvtest
## 383838 25167 247187 225296 279045 225503 244482 226142
## hplsttst hadhyst2 pcpsaad2 pcpsadi1 pcpsare1 psatest1 psatime pcpsars1
## 419058 271408 334583 334711 334772 334840 398978 399044
## bldstool lstblds3 hadsigm3 hadsgco1 lastsig3 hivtst6 hivtstd3 hivrisk4
## 181611 381928 181924 264494 264557 32982 350195 34235
## pdiabtst prediab1 insulin bldsugar feetchk2 doctdiab chkhemo3 feetchk
## 294693 294692 476768 476768 476768 476769 476768 476814
## eyeexam diabeye diabedu painact2 qlmentl2 qlstres2 qlhlth2 medicare
## 476768 476768 476768 476964 476971 476979 476989 432666
## hlthcvr1 delaymed dlyother nocov121 lstcovrg drvisits medscost carercvd
## 432667 429001 0 432668 482616 428980 428980 428980
## medbill1 medadvic undrstnd written caregiv1 crgvrel1 crgvlng1 crgvhrs1
## 428980 395078 395157 395297 409710 471117 471137 471165
## crgvprb2 crgvpers crgvhous crgvmst2 crgvexpt cimemlos cdhouse cdassist
## 471192 471216 471231 471264 425077 410160 477890 477897
## cdhelp cdsocial cddiscus ssbsugr2 ssbfrut2 calrinfo marijana usemrjna
## 483805 477903 477913 418152 418236 474791 379483 480901
## asthmage asattack aservist asdrvist asrchkup asactlim asymptom asnoslep
## 485748 485931 486129 486130 485932 485933 485936 486041
## asthmed3 asinhalr imfvplac hpvadvc2 hpvadsht shingle2 numburn2 cncrdiff
## 485937 485937 470779 462620 483878 468928 462908 477896
## cncrage cncrtyp1 csrvtrt1 csrvdoc1 csrvsum csrvrtrn csrvinst csrvinsr
## 478004 478012 478010 480151 480158 480161 482161 480165
## csrvdein csrvclin csrvpain csrvctl1 profexam lengexam pcpsade1 pcdmdecn
## 480166 480167 480170 485884 479759 480304 486303 0
## sxorient trnsgndr rcsgendr rcsrltn2 casthdx2 casthno2 emtsuprt lsatisfy
## 280869 281030 416701 416947 422065 478587 456766 456780
## qlactlm2 useequip qstver qstlang mscode ststr strwt rawrake
## 279078 279477 0 14 240028 0 0 0
## wt2rake chispnc crace1 cprace cllcpwt dualuse dualcor llcpwt2
## 0 109413 412360 412360 416724 0 196720 0
## llcpwt rfhlth phys14d ment14d hcvu651 totinda michd ltasth1
## 0 0 0 0 0 0 4125 0
## casthm1 asthms1 drdxar1 exteth2 alteth2 denvst2 prace1 mrace1
## 0 0 2477 0 308872 0 4 4
## hispanc race raceg21 racegr3 raceg1 ageg5yr age65yr age80
## 0 0 0 0 8683 0 0 0
## ageg htin4 htm4 wtkg3 bmi5 bmi5cat rfbmi5 chldcnt
## 0 19133 16972 33530 39616 39616 0 0
## educag incomg smoker3 rfsmok3 ecigsts curecig drnkany5 drocdy3
## 0 0 0 0 0 0 0 0
## rfbing5 drnkwek rfdrhv5 flshot6 pneumo2 rfseat2 rfseat3 drnkdrv
## 0 0 0 308872 308872 0 0 0
## rfmam2y mam5021 rfpap33 rfpsa21 rfblds3 col10yr hfob3yr fs5yr
## 263226 354260 383917 334448 251512 256162 251512 375290
## fobtfs crcrec aidtst3
## 272505 255147 32982
#Individual Levels
#continuous outcome: number of alcoholic drinks past 30 days
brfss16$numbofdrinks<-Recode(brfss16$avedrnk2, recodes="77=NA;99=NA", as.factor=F)
#income grouping
brfss16$inc<-Recode(brfss16$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor= T)
brfss16$inc<-as.ordered(brfss16$inc)
#education level
brfss16$educ<-Recode(brfss16$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
#race/ethnicity
brfss16$raceeth<-Recode(brfss16$racegr3, recodes="1='1_White'; 2='2_Black'; 5='3_Hispanic'; 3:4='4_other'; else=NA",as.factor=T)
#Higher Level
#mscode density of living arrangements
brfss16$urbanstyle<-Recode(brfss16$mscode, recodes=
"1='Inner_City';
2='City_Skirts';
3='City_Couty';
5='Rural_County';
else=NA",
as.factor=T)
brfss162<-brfss16%>%
dplyr::select(numbofdrinks,inc,educ,raceeth,urbanstyle)%>%
filter(complete.cases(.))
ANOVA
fit.anova<-lm(numbofdrinks~as.factor(urbanstyle), data=brfss16)
anova(fit.anova)
## Analysis of Variance Table
##
## Response: numbofdrinks
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(urbanstyle) 3 547 182.424 45.344 < 2.2e-16 ***
## Residuals 110753 445569 4.023
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Yay, I found significance.
brfss16.fit<-lmer(numbofdrinks~inc+educ+raceeth+(1|urbanstyle), brfss16)
rand(brfss16.fit)
## Analysis of Random effects Table:
## Chi.sq Chi.DF p.value
## urbanstyle 53.2 1 3e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Without individual level variables
display(brfss16.fit, detail=T)
## lme4::lmer(formula = numbofdrinks ~ inc + educ + raceeth + (1 |
## urbanstyle), data = brfss16)
## coef.est coef.se t value
## (Intercept) 2.72 0.08 34.22
## inc.L -0.18 0.02 -8.20
## inc.Q 0.17 0.02 8.04
## inc.C -0.02 0.02 -1.00
## inc^4 -0.01 0.02 -0.59
## educ1somehs -0.08 0.09 -0.98
## educ2hsgrad -0.50 0.08 -6.70
## educ3somecol -0.74 0.08 -9.90
## educ4colgrad -0.98 0.08 -13.06
## raceeth2_Black 0.05 0.03 1.73
## raceeth3_Hispanic 0.46 0.04 12.90
## raceeth4_other 0.50 0.03 14.42
##
## Error terms:
## Groups Name Std.Dev.
## urbanstyle (Intercept) 0.06
## Residual 2.00
## ---
## number of obs: 95224, groups: urbanstyle, 4
## AIC = 402470, DIC = 402311
## deviance = 402376.3
#it can be a little hairy to get it, but it can be done using various part of VarCorr()
ICC1<-VarCorr(brfss16.fit)$urbanstyle[1]/(VarCorr(brfss16.fit)$urbanstyle[1]+attr(VarCorr(brfss16.fit), "sc"))
ICC1
## [1] 0.001683403
So, the variation only explains less than 1% (~0.17%) in number of drinks according to the urban living space.
brfss16.fit2<-lmer(numbofdrinks~inc+educ+raceeth+(raceeth|urbanstyle), brfss16)
display(brfss16.fit2, detail=T)
## lme4::lmer(formula = numbofdrinks ~ inc + educ + raceeth + (raceeth |
## urbanstyle), data = brfss16)
## coef.est coef.se t value
## (Intercept) 2.71 0.08 34.68
## inc.L -0.17 0.02 -7.87
## inc.Q 0.17 0.02 7.90
## inc.C -0.02 0.02 -0.91
## inc^4 -0.01 0.02 -0.57
## educ1somehs -0.08 0.09 -0.96
## educ2hsgrad -0.50 0.08 -6.63
## educ3somecol -0.74 0.08 -9.82
## educ4colgrad -0.97 0.08 -12.97
## raceeth2_Black 0.03 0.07 0.40
## raceeth3_Hispanic 0.44 0.05 9.30
## raceeth4_other 0.47 0.15 3.06
##
## Error terms:
## Groups Name Std.Dev. Corr
## urbanstyle (Intercept) 0.05
## raceeth2_Black 0.13 -0.23
## raceeth3_Hispanic 0.06 -0.60 0.92
## raceeth4_other 0.30 0.67 0.57 0.20
## Residual 2.00
## ---
## number of obs: 95224, groups: urbanstyle, 4
## AIC = 402414, DIC = 402243
## deviance = 402305.7
#it can be a little hairy to get it, but it can be done using various part of VarCorr()
ICC2<-VarCorr(brfss16.fit2)$urbanstyle[1]/(VarCorr(brfss16.fit2)$urbanstyle[1]+attr(VarCorr(brfss16.fit2), "sc"))
ICC2
## [1] 0.001311932
Here, the variation only explains also less than 1% (~0.13%) in number of drinks according to the urban living space.
anova(brfss16.fit,brfss16.fit2, test="Chisq")
## refitting model(s) with ML (instead of REML)
## Data: brfss16
## Models:
## object: numbofdrinks ~ inc + educ + raceeth + (1 | urbanstyle)
## ..1: numbofdrinks ~ inc + educ + raceeth + (raceeth | urbanstyle)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## object 14 402404 402537 -201188 402376
## ..1 23 402352 402569 -201153 402306 70.662 9 1.129e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The second model fits better (Random Intercept / Random Slope).