library(haven)
library(SASxport)
## Warning: package 'SASxport' was built under R version 4.0.5
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(ggplot2)
library(pander)
library(knitr)
library(tidycensus)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#Use the BRFSS for this assignment, UNLESS your data set has a higher level variable like school, county, metro area, etc. You can contact me if you are uncertain.Use a continuous outcome Conduct a multi-level analysis where you:
#avedrnk2 is my continuous outcome, or average alcoholic drinks consumed in the last 30 days.
brfss_17<-read_xpt("C:/Users/josha/Desktop/Fall 2020 Stats 1 Dem/WorkingDirectoryFall2020StatsDem1/MMSA2017.XPT")
nams<-names(brfss_17)
newnames<-gsub(pattern = "_",
replacement = "",
x = nams)
names(brfss_17)<-tolower(newnames)
#sex
brfss_17$male<-ifelse(brfss_17$sex==1, 1, 0)
#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")
brfss_17$race_eth<-Recode(brfss_17$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss_17$race_eth<-relevel(brfss_17$race_eth, ref = "nhwhite")
#employment
brfss_17$employ<-Recode(brfss_17$employ1,
recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='employloyed')
#marital status
brfss_17$marst<-Recode(brfss_17$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst,
ref='married')
#education level
brfss_17$educ<-Recode(brfss_17$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
brfss_17$educ<-relevel(brfss_17$educ, ref='2hsgrad')
head(data.frame(name=table(brfss_17$mmsaname),
id=unique(brfss_17$mmsa)))
## name.Var1 name.Freq
## 1 Aberdeen, SD, Micropolitan Statistical Area 512
## 2 Albany-Schenectady-Troy, NY, Metropolitan Statistical Area 632
## 3 Albuquerque, NM, Metropolitan Statistical Area 1654
## 4 Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area 866
## 5 Anchorage, AK, Metropolitan Statistical Area 1029
## 6 Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area 2944
## id
## 1 10100
## 2 10580
## 3 10740
## 4 10900
## 5 11260
## 6 12060
#Estimate the random intercept model with just the higher level units as your predictor (NULL model)report the variance components and ICC
merged<-brfss_17%>%
dplyr::select(avedrnk2, marst, mmsa, educ, race_eth, menthlth, male, employ, mmsawt)%>%
filter(complete.cases(.))
fit.an<-lm(avedrnk2~as.factor(mmsa),
merged)
anova(fit.an)
## Analysis of Variance Table
##
## Response: avedrnk2
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(mmsa) 135 79895 591.81 5.7771 < 2.2e-16 ***
## Residuals 116016 11884884 102.44
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lme4)
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(arm)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## arm (Version 1.11-2, built: 2020-7-27)
## Working directory is C:/Users/josha/Desktop/Fall 2020 Stats 1 Dem/WorkingDirectoryFall2020StatsDem1
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
fit<-lmer(avedrnk2~(1|mmsa),
data=merged)
arm::display(fit, detail=T)
## lmer(formula = avedrnk2 ~ (1 | mmsa), data = merged)
## coef.est coef.se t value
## (Intercept) 3.52 0.08 44.47
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.81
## Residual 10.12
## ---
## number of obs: 116152, groups: mmsa, 136
## AIC = 867552, DIC = 867539.2
## deviance = 867542.5
(.81^2)/(10.12^2+.81^2)
## [1] 0.006365546
#Now fit the multilevel model for your outcome and include both the higher level term and appropriate individual - level predictors.#Report the variance components of the model and describe the results of the model Report the results for the fixed effects
fit2<-lmer(avedrnk2~male+race_eth+employ+marst+menthlth+educ+(1|mmsa),
data=merged)
arm::display(fit2, detail=T)
## lmer(formula = avedrnk2 ~ male + race_eth + employ + marst +
## menthlth + educ + (1 | mmsa), data = merged)
## coef.est coef.se t value
## (Intercept) 3.40 0.12 29.12
## male 1.00 0.06 16.44
## race_ethhispanic 1.77 0.12 14.52
## race_ethnh black 0.92 0.12 8.00
## race_ethnh multirace 0.56 0.23 2.45
## race_ethnh other 0.89 0.17 5.35
## employnilf 0.23 0.10 2.29
## employretired -0.33 0.07 -4.44
## employunable 0.98 0.17 5.67
## marstcohab 0.63 0.15 4.10
## marstdivorced 0.56 0.09 6.04
## marstnm 1.21 0.08 14.45
## marstseparated 0.55 0.23 2.41
## marstwidowed 0.40 0.11 3.45
## menthlth 0.00 0.00 2.35
## educ0Prim 2.22 0.31 7.08
## educ1somehs 2.34 0.20 11.63
## educ3somecol -1.05 0.09 -11.75
## educ4colgrad -1.79 0.08 -22.00
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.62
## Residual 10.02
## ---
## number of obs: 116152, groups: mmsa, 136
## AIC = 865150, DIC = 864995
## deviance = 865051.2
(.62^2)/(10.02^2+.62^2)
## [1] 0.003814067
##Compare the models from 1 and 2 using a likelihood ratio test (anova(fit1, fit2, test=”Chisq”)) .
anova(fit, fit2)
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit: avedrnk2 ~ (1 | mmsa)
## fit2: avedrnk2 ~ male + race_eth + employ + marst + menthlth + educ +
## fit2: (1 | mmsa)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## fit 3 867548 867577 -433771 867542
## fit2 21 865093 865296 -432526 865051 2491.2 18 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
rand(fit2)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## avedrnk2 ~ male + race_eth + employ + marst + menthlth + educ + (1 | mmsa)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 21 -432554 865149
## (1 | mmsa) 20 -432661 865361 213.86 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Does the inclusion of the individual level variables increase the overall model fit? An anova was run to compare the two models, with fit1 just being city level indicators, and fit2 being the more complicated model. The aic of fit is lower by about 2000, with the chisq coefficient being pretty high at 2491.2 meaning there is a small chance this at random. The second more comprehensive model fits the better than the previous one. Additionally to test this, a random effect test was run, indicating that not all cities will have the same on average consumption of alcohol during the month.