#packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(foreign)
#Load BRFSS
brfss2017=read.xport("~/Desktop/DataFolder/brfss.XPT ")
brfss2017%>%
filter(complete.cases(.))
## [1] X_STATE FMONTH IDATE IMONTH IDAY IYEAR DISPCODE
## [8] SEQNO X_PSU CTELENM1 PVTRESD1 COLGHOUS STATERE1 CELLFON4
## [15] LADULT NUMADULT NUMMEN NUMWOMEN SAFETIME CTELNUM1 CELLFON5
## [22] CADULT PVTRESD3 CCLGHOUS CSTATE1 LANDLINE HHADULT GENHLTH
## [29] PHYSHLTH MENTHLTH POORHLTH HLTHPLN1 PERSDOC2 MEDCOST CHECKUP1
## [36] BPHIGH4 BPMEDS CHOLCHK1 TOLDHI2 CHOLMED1 CVDINFR4 CVDCRHD4
## [43] CVDSTRK3 ASTHMA3 ASTHNOW CHCSCNCR CHCOCNCR CHCCOPD1 HAVARTH3
## [50] ADDEPEV2 CHCKIDNY DIABETE3 DIABAGE2 LMTJOIN3 ARTHDIS2 ARTHSOCL
## [57] JOINPAI1 SEX MARITAL EDUCA RENTHOM1 NUMHHOL2 NUMPHON2
## [64] CPDEMO1A VETERAN3 EMPLOY1 CHILDREN INCOME2 INTERNET WEIGHT2
## [71] HEIGHT3 PREGNANT DEAF BLIND DECIDE DIFFWALK DIFFDRES
## [78] DIFFALON SMOKE100 SMOKDAY2 STOPSMK2 LASTSMK2 USENOW3 ECIGARET
## [85] ECIGNOW ALCDAY5 AVEDRNK2 DRNK3GE5 MAXDRNKS FRUIT2 FRUITJU2
## [92] FVGREEN1 FRENCHF1 POTATOE1 VEGETAB2 EXERANY2 EXRACT11 EXEROFT1
## [99] EXERHMM1 EXRACT21 EXEROFT2 EXERHMM2 STRENGTH SEATBELT FLUSHOT6
## [106] FLSHTMY2 PNEUVAC3 SHINGLE2 HIVTST6 HIVTSTD3 HIVRISK5 PDIABTST
## [113] PREDIAB1 INSULIN BLDSUGAR FEETCHK2 DOCTDIAB CHKHEMO3 FEETCHK
## [120] EYEEXAM DIABEYE DIABEDU COPDCOGH COPDFLEM COPDBRTH COPDBTST
## [127] COPDSMOK HAREHAB1 STREHAB1 CVDASPRN ASPUNSAF RLIVPAIN RDUCHART
## [134] RDUCSTRK BPEATHBT BPSALT BPALCHOL BPEXER BPEATADV BPSLTADV
## [141] BPALCADV BPEXRADV BPMEDADV BPHI2MR ARTTODAY ARTHWGT ARTHEXER
## [148] ARTHEDU ASTHMAGE ASATTACK ASERVIST ASDRVIST ASRCHKUP ASACTLIM
## [155] ASYMPTOM ASNOSLEP ASTHMED3 ASINHALR PAINACT2 QLMENTL2 QLSTRES2
## [162] QLHLTH2 SLEPTIM1 ADSLEEP SLEPDAY1 SLEPSNO2 SLEPBRTH MEDICARE
## [169] HLTHCVR1 DELAYMED DLYOTHER NOCOV121 LSTCOVRG DRVISITS MEDSCOS1
## [176] CARERCVD MEDBILL1 ASBIALCH ASBIDRNK ASBIBING ASBIADVC ASBIRDUC
## [183] CNCRDIFF CNCRAGE CNCRTYP1 CSRVTRT2 CSRVDOC1 CSRVSUM CSRVRTRN
## [190] CSRVINST CSRVINSR CSRVDEIN CSRVCLIN CSRVPAIN CSRVCTL1 SSBSUGR2
## [197] SSBFRUT3 WTCHSALT DRADVISE MARIJANA USEMRJN1 RSNMRJNA PFPPRVN2
## [204] TYPCNTR7 NOBCUSE6 IMFVPLAC HPVADVC2 HPVADSHT TETANUS LCSFIRST
## [211] LCSLAST LCSNUMCG LCSCTSCN CAREGIV1 CRGVREL2 CRGVLNG1 CRGVHRS1
## [218] CRGVPRB2 CRGVPERS CRGVHOUS CRGVMST2 CRGVEXPT CIMEMLOS CDHOUSE
## [225] CDASSIST CDHELP CDSOCIAL CDDISCUS EMTSUPRT LSATISFY SDHBILLS
## [232] SDHMOVE HOWSAFE1 SDHFOOD SDHMEALS SDHMONEY SDHSTRES SXORIENT
## [239] TRNSGNDR FIREARM4 GUNLOAD LOADULK2 RCSGENDR RCSRLTN2 CASTHDX2
## [246] CASTHNO2 QSTVER QSTLANG MSCODE X_STSTR X_STRWT X_RAWRAKE
## [253] X_WT2RAKE X_IMPRACE X_CHISPNC X_CRACE1 X_CPRACE X_CLLCPWT X_DUALUSE
## [260] X_DUALCOR X_LLCPWT2 X_LLCPWT X_RFHLTH X_PHYS14D X_MENT14D X_HCVU651
## [267] X_RFHYPE5 X_CHOLCH1 X_RFCHOL1 X_MICHD X_LTASTH1 X_CASTHM1 X_ASTHMS1
## [274] X_DRDXAR1 X_LMTACT1 X_LMTWRK1 X_LMTSCL1 X_PRACE1 X_MRACE1 X_HISPANC
## [281] X_RACE X_RACEG21 X_RACEGR3 X_RACE_G1 X_AGEG5YR X_AGE65YR X_AGE80
## [288] X_AGE_G HTIN4 HTM4 WTKG3 X_BMI5 X_BMI5CAT X_RFBMI5
## [295] X_CHLDCNT X_EDUCAG X_INCOMG X_SMOKER3 X_RFSMOK3 X_ECIGSTS X_CURECIG
## [302] DRNKANY5 DROCDY3_ X_RFBING5 X_DRNKWEK X_RFDRHV5 FTJUDA2_ FRUTDA2_
## [309] GRENDA1_ FRNCHDA_ POTADA1_ VEGEDA2_ X_MISFRT1 X_MISVEG1 X_FRTRES1
## [316] X_VEGRES1 X_FRUTSU1 X_VEGESU1 X_FRTLT1A X_VEGLT1A X_FRT16A X_VEG23A
## [323] X_FRUITE1 X_VEGETE1 X_TOTINDA METVL11_ METVL21_ MAXVO2_ FC60_
## [330] ACTIN11_ ACTIN21_ PADUR1_ PADUR2_ PAFREQ1_ PAFREQ2_ X_MINAC11
## [337] X_MINAC21 STRFREQ_ PAMISS1_ PAMIN11_ PAMIN21_ PA1MIN_ PAVIG11_
## [344] PAVIG21_ PA1VIGM_ X_PACAT1 X_PAINDX1 X_PA150R2 X_PA300R2 X_PA30021
## [351] X_PASTRNG X_PAREC1 X_PASTAE1 X_RFSEAT2 X_RFSEAT3 X_FLSHOT6 X_PNEUMO2
## [358] X_AIDTST3
## <0 rows> (or 0-length row.names)
options(survey.lonely.psu = "adjust")
samp<-sample(1:dim(brfss2017)[1],size = 25000)
brfss2017<-brfss2017[samp,]
#Recode variables
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
brfss2017$activity<-Recode(brfss2017$PA1MIN_,recodes="1500:99999=NA")
brfss2017$activity1<-Recode(brfss2017$activity,recodes="0:299=0;300:1499=1")
#Marital Status; Are you? (marital status): 1 Married, 2 Divorced, 3 Widowed, 4 Seprarated, 5 Never married, 6 Partner, 9 Refuse
brfss2017$marital<-Recode(brfss2017$MARITAL,recodes="1='b-Married';5='a-Single';2:4='c-D/W/S';6='partner';else=NA",as.factor=T)
#Sexual Orientation; Do you consider yourself to be? (sexual orientation): 1 Straight, 2 Gay, 3 Bisexual, 4 Other, 7 Don't know, 9 Refuse
brfss2017$sxorient<-Recode(brfss2017$SXORIENT,recodes="1='a-Straight';2='b-Gay';3='Bisexual';4:7='Other';else=NA",as.factor=T)
#Income
brfss2017$income<-Recode(brfss2017$INCOME2,recodes="1:3='<$20k';4:5='$20k<$35k';6='35k<50k';7='50k<75';8='>75k';else=NA",as.factor=T)
#Age
brfss2017$age<-Recode(brfss2017$X_AGE_G, recodes="1='18-24';2='25-34';3='35-44';4='45-54';5='55-64';6='65+';else=NA",as.factor=T)
#Education
brfss2017$educ<-Recode(brfss2017$EDUCA, recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
brfss2017$smoke<-Recode(brfss2017$SMOKDAY2,recodes="1:2='Y';3='N';else=NA",as.factor=T)
#Was there a time in the past 12 months when you needed to see a doctor but could not because of cost? 1 Yes, 2 No, 7 Don't know, 9 Refuse
brfss2017$medcost<-Recode(brfss2017$MEDCOST,recodes="1='Y';2='N';else=NA",as.factor=T)
#During the past 30 days, on the days when you drank, about how many drinks did you drink on the average?
brfss2017$drinks<-Recode(brfss2017$AVEDRNK2,recodes="1:5='1-5';5:10='5-10';10:30='10-30';77=NA;99=NA;else=NA",as.factor=T)
#BMI; Four-categories of Body Mass Index (BMI): 1 Underweight, 2 Normal Weight, 3 Overweight, 4 Obese
brfss2017$bmi<-brfss2017$X_BMI5CAT
#Not good mental health days: 1 (0 days), 2 (1-13 days), 3 (14-30 days), 9 Don't know
brfss2017$badmental<-Recode(brfss2017$X_MENT14D,recodes="1=0;2:3=1;9=NA;else=NA",as.factor=T)
#Race/Ethnicity
brfss2017$black<-Recode(brfss2017$racegr3, recodes="2=1; 9=NA; else=0")
brfss2017$white<-Recode(brfss2017$racegr3, recodes="1=1; 9=NA; else=0")
brfss2017$other<-Recode(brfss2017$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss2017$hispanic<-Recode(brfss2017$racegr3, recodes="5=1; 9=NA; else=0")
#Race/Ethnicity; 1 White, 2 Black, 3 Other, 4 Multiracial, 5 Hispanic, 9 Don't know
brfss2017$race<-Recode(brfss2017$X_RACEGR3, recodes="2='black'; 1='awhite'; 3:4='other';5='hispanic'; else=NA",as.factor=T)
brfss2017$race<-relevel(brfss2017$race, ref='awhite')
#Sex; 1 Male, 2 Female
brfss2017$sex<-Recode(brfss2017$SEX,recodes="1=1;2=0;9=NA;else=NA",as.factor=T)
brfss2017$genh<-Recode(brfss2017$GENHLTH,recodes="1:3='aGood';4:5='Bad';else=NA",as.factor=T)
brfss2017$physh<-Recode(brfss2017$PHYSHLTH,recodes="88=0;77=NA;99=NA",as.factor=T)
brfss2017$menth<-Recode(brfss2017$MENTHLTH,recodes="88=0;77=NA;99=NA",as.factor=T)
brfss2017$emosupport<-Recode(brfss2017$EMTSUPRT,recodes="1:2='a-Yes';3='b-Sometimes';4:5='c-No';else=NA",as.factor=T)
brfss2017$satisfed<-Recode(brfss2017$LSATISFY,recodes="1:2='a-Satisfied';3:4='b-Dissatisfied';else=NA",as.factor=T)
brfss2017$employ<-Recode(brfss2017$EMPLOY1,recodes="1:2='Yes';3:4='No';5:8='other';else=NA",as.factor=T)
brfss2017$state<-brfss2017$X_STATE
summary(brfss2017$activity)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.0 120.0 240.0 343.1 476.0 1498.0 9665
brfss2017 <- brfss2017 %>%
select(activity,activity1,marital,sxorient,age,educ,state)%>%
filter(complete.cases(.))
library(pander)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Warning: package 'survey' was built under R version 3.5.2
## Loading required package: grid
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.5.2
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(sjPlot)
## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.2.15
## Current Matrix version is 1.2.16
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package
## #refugeeswelcome
library(ggplot2)
library(tidycensus)
## Warning: package 'tidycensus' was built under R version 3.5.2
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.2
#Basic fixed-effects ANOVA to see if there is any variation in higher level units.
fit.an<-lm(activity~as.factor(state), brfss2017)
anova(fit.an)
## Analysis of Variance Table
##
## Response: activity
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(state) 27 5713079 211596 2.2004 0.0003287 ***
## Residuals 7190 691418818 96164
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Use GLM for non-normal outcomes
fit.act<-glm(activity1~as.factor(state),family=binomial,brfss2017)
anova(fit.act,test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: activity1
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 7217 9913.4
## as.factor(state) 27 49.064 7190 9864.3 0.005825 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Fit the hierarchical model
library(lme4)
## Warning: package 'lme4' was built under R version 3.5.2
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 3.5.2
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(arm)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is /Users/sdw227/Desktop/SPRING 2019/STATISTICS/homework/multi level models
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
1) Estimate the random intercept model with just the higher level units as your predictor (NULL model)
# 1) Estimate the random intercept model with just the higher level units as your predictor (NULL model)
fit<-glmer(activity~(1|state),data = brfss2017,na.action=na.omit)
## Warning in glmer(activity ~ (1 | state), data = brfss2017, na.action
## = na.omit): calling glmer() with family=gaussian (identity link) as a
## shortcut to lmer() is deprecated; please call lmer() directly
summary(fit)
## Linear mixed model fit by REML ['lmerMod']
## Formula: activity ~ (1 | state)
## Data: brfss2017
##
## REML criterion at convergence: 103317.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.1983 -0.7324 -0.3053 0.4073 3.7430
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 456.6 21.37
## Residual 96171.1 310.11
## Number of obs: 7218, groups: state, 28
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 343.360 5.625 61.04
arm::display(fit,detail=T)
## lme4::lmer(formula = activity ~ (1 | state), data = brfss2017,
## na.action = na.omit)
## coef.est coef.se t value
## (Intercept) 343.36 5.63 61.04
##
## Error terms:
## Groups Name Std.Dev.
## state (Intercept) 21.37
## Residual 310.11
## ---
## number of obs: 7218, groups: state, 28
## AIC = 103323, DIC = 103327.8
## deviance = 103322.6
##1a) Report the variance components and ICC.
# Within group variance is large. The between group variance seems reasonable.
library(sjstats)
## Warning: package 'sjstats' was built under R version 3.5.2
##
## Attaching package: 'sjstats'
## The following objects are masked from 'package:survey':
##
## cv, deff
re_var(fit)
## Within-group-variance: 96171.065
## Between-group-variance: 456.577 (state)
meanact<-mean(brfss2017$activity,na.rm=T)
sdact<-sd(brfss2017$activity,na.rm=T)
brfss2017$predact<-sdact*(fitted(fit))+meanact
citymeans<-aggregate(cbind(activity,predact)~state,brfss2017,mean)
head(citymeans,n=10)
## state activity predact
## 1 6 384.0847 114444.58
## 2 9 356.7681 109609.68
## 3 10 334.0000 105949.83
## 4 12 351.4855 108946.61
## 5 13 263.2823 97837.70
## 6 15 374.4886 112441.31
## 7 17 349.8344 107937.89
## 8 18 307.4089 99703.25
## 9 19 292.1128 98306.69
## 10 22 325.3643 104826.88
2) Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual level predictors
# 2) Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual level predictors
fit2<-lmer(activity~marital+sxorient+age+educ+(1|state),data = brfss2017,na.action=na.omit)
arm::display(fit2,detail=T)
## lmer(formula = activity ~ marital + sxorient + age + educ + (1 |
## state), data = brfss2017, na.action = na.omit)
## coef.est coef.se t value
## (Intercept) 203.67 31.77 6.41
## maritalb-Married 14.23 11.86 1.20
## maritalc-D/W/S 1.98 13.36 0.15
## maritalpartner 5.99 21.10 0.28
## sxorientb-Gay 75.03 28.51 2.63
## sxorientBisexual 14.64 28.26 0.52
## sxorientOther -79.46 36.18 -2.20
## age25-34 -38.42 19.01 -2.02
## age35-44 -27.08 19.98 -1.36
## age45-54 -2.45 19.30 -0.13
## age55-64 29.62 18.95 1.56
## age65+ 97.99 18.76 5.22
## educ1somehs 86.75 33.64 2.58
## educ2hsgrad 100.60 28.18 3.57
## educ3somecol 98.87 28.10 3.52
## educ4colgrad 103.77 27.81 3.73
##
## Error terms:
## Groups Name Std.Dev.
## state (Intercept) 22.69
## Residual 305.52
## ---
## number of obs: 7218, groups: state, 28
## AIC = 103016, DIC = 103208.8
## deviance = 103094.4
summary(fit2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: activity ~ marital + sxorient + age + educ + (1 | state)
## Data: brfss2017
##
## REML criterion at convergence: 102980
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.4570 -0.7049 -0.2855 0.4133 3.9204
##
## Random effects:
## Groups Name Variance Std.Dev.
## state (Intercept) 514.7 22.69
## Residual 93344.0 305.52
## Number of obs: 7218, groups: state, 28
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 203.668 31.770 5489.100 6.411 1.57e-10 ***
## maritalb-Married 14.231 11.862 7201.688 1.200 0.230298
## maritalc-D/W/S 1.978 13.355 7201.186 0.148 0.882246
## maritalpartner 5.991 21.103 7195.840 0.284 0.776504
## sxorientb-Gay 75.028 28.512 7196.873 2.631 0.008520 **
## sxorientBisexual 14.640 28.262 7197.537 0.518 0.604466
## sxorientOther -79.458 36.177 7200.913 -2.196 0.028098 *
## age25-34 -38.424 19.005 7193.960 -2.022 0.043241 *
## age35-44 -27.081 19.982 7187.900 -1.355 0.175375
## age45-54 -2.449 19.297 7195.211 -0.127 0.899011
## age55-64 29.616 18.950 7198.338 1.563 0.118123
## age65+ 97.995 18.756 7199.856 5.225 1.79e-07 ***
## educ1somehs 86.752 33.643 7196.672 2.579 0.009940 **
## educ2hsgrad 100.601 28.178 7201.683 3.570 0.000359 ***
## educ3somecol 98.866 28.100 7201.381 3.518 0.000437 ***
## educ4colgrad 103.772 27.806 7201.981 3.732 0.000191 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
# 2a Report the variance components of the model and describe the results.
# The within group variance is smaller then in the fit model, but it is still large. The between group variance has dropped.
re_var(fit2)
## Within-group-variance: 93344.004
## Between-group-variance: 514.691 (state)
3) Compare the models from 1 and 2 using a likelihood ratio test to see if our individual level variables explain anything about the outcome.
# 3) Compare the models from 1 and 2 using a likelihood ratio test to see if our individual level variables explain anything about the outcome.
anova(fit,fit2,test="Chisq")
## refitting model(s) with ML (instead of REML)
## Data: brfss2017
## Models:
## fit: activity ~ (1 | state)
## fit2: activity ~ marital + sxorient + age + educ + (1 | state)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit 3 103329 103349 -51661 103323
## fit2 18 103130 103254 -51547 103094 228.15 15 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Test to see if the random effect term is significant - The inclusion of the individual level variables is significant and improves the models overall fit.
rand(fit2)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## activity ~ marital + sxorient + age + educ + (1 | state)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 18 -51490 103016
## (1 | state) 17 -51497 103028 13.873 1 0.0001956 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1