The BRFSS dataset was used for this assignment. The outcome variable (wtkg3) is continuous and pertains to respondent’s reported weight in kilograms. The given structrual levels for this analysis are MSAs. The purpose of this assignment is to examine how 1). selected multi-level propositions affect respondent’s wtk3, and 2). how individual-level factors affect their respondent’s wtk3.
#load brfss
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(sjPlot)
library(ggplot2)
library(pander)
library(knitr)
library(tidycensus)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Variable names are changed in case and made neater.
library(haven)
MMSA2016 <- read_xpt("Desktop/Dem 7113/Week 4/MMSA2016.xpt")
set.seed(12345)
nams<-names(MMSA2016)
newnames<-gsub(pattern = "_", replacement = "", x = nams)
names(MMSA2016)<-tolower(newnames)
I use similar variables from the class example, but my outcome variable is weight in kilograms.
#sex
MMSA2016$male<-ifelse(MMSA2016$sex==1, 1, 0)
#BMI
MMSA2016$wtkg3<-ifelse(is.na(MMSA2016$wtkg3)==T, NA, MMSA2016$wtkg3/100)
#race/ethnicity
MMSA2016$black<-Recode(MMSA2016$racegr3, recodes="2=1; 9=NA; else=0")
MMSA2016$white<-Recode(MMSA2016$racegr3, recodes="1=1; 9=NA; else=0")
MMSA2016$other<-Recode(MMSA2016$racegr3, recodes="3:4=1; 9=NA; else=0")
MMSA2016$hispanic<-Recode(MMSA2016$racegr3, recodes="5=1; 9=NA; else=0")
MMSA2016$race_eth<-Recode(MMSA2016$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; e
lse=NA",
as.factor = T)
MMSA2016$race_eth<-relevel(MMSA2016$race_eth, ref = "nhwhite")
#income grouping
MMSA2016$inc<-Recode(MMSA2016$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor = T)
MMSA2016$inc<-as.ordered(MMSA2016$inc)
#education level
MMSA2016$educ<-Recode(MMSA2016$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
MMSA2016$educ<-relevel(MMSA2016$educ, ref='2hsgrad')
#marital status
MMSA2016$marst<-Recode(MMSA2016$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab';
else=NA",
as.factor=T)
MMSA2016$marst<-relevel(MMSA2016$marst, ref='married')
#Age cut into intervals
MMSA2016$agec<-cut(MMSA2016$age80, breaks=c(0,24,39,59,79,99))
MMSA2016$wtkg3<-MMSA2016$wtkg3/100
The following series of code involve querying ACS data pertaining to MSAs, in preparation to merge with BRFSS dataset.
head(data.frame(name=table(MMSA2016$mmsaname),id=unique(MMSA2016$mmsa)))
## name.Var1
## 1 Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2 Albuquerque, NM, Metropolitan Statistical Area
## 3 Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4 Anchorage, AK, Metropolitan Statistical Area
## 5 Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6 Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
## name.Freq id
## 1 2901 10580
## 2 1300 10740
## 3 675 10900
## 4 1028 11260
## 5 2444 12060
## 6 849 12260
length(table(MMSA2016$mmsa))
## [1] 143
census_api_key("f3d69cc5bbda135c4852cf616317f7eca5407001")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
usacs<-get_acs(geography="metropolitan statistical area/micropolitan statistical area", year = 2011, variables=c( "DP05_0001E","DP03_0009P","DP03_0062E","DP03_0119PE","DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092P","DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099P","DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE","DP02_0038E","DP03_0101PE","DP05_0066PE","DP05_0033","DP05_0032", "DP05_0072PE","DP02_0113PE", "DP04_0003PE"), summary_var = "B01001_001", geometry = F, output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Using the ACS Data Profile
## Using the ACS Data Profile
usacs<-usacs%>%
mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE/100,nwhite=DP05_0032E,nblack=DP05_0033E,pblack=DP05_0073PE/100 , phisp=DP05_0066PE/100, pfemhh=DP02_0008PE/100,phsormore=DP02_0066PE/100,punemp=DP03_0009PE/100, medhhinc=DP03_0062E,ppov=DP03_0119PE/100,pforn=DP02_0092PE/100,plep=DP02_0113PE/100,pvacant=DP04_0003PE/100)%>%
select(GEOID,NAME, totpop, pwhite, pblack, phisp, pfemhh, phsormore, punemp,medhhinc, ppov, pforn,plep, pvacant)
head(usacs)
## # A tibble: 6 x 14
## GEOID NAME totpop pwhite pblack phisp pfemhh phsormore punemp medhhinc
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10020 Abbe… 57660 0.801 0.138 0.024 0.13 0.756 0.059 43349
## 2 10100 Aber… 40315 0.933 0.005 0.0140 0.066 0.907 0.025 47395
## 3 10140 Aber… 72348 0.818 0.01 0.084 0.103 0.844 0.135 42729
## 4 10180 Abil… 164100 0.681 0.072 0.21 0.131 0.831 0.055 42644
## 5 10220 Ada,… 37102 0.698 0.025 0.0410 0.127 0.853 0.05 40641
## 6 10300 Adri… 100456 0.877 0.024 0.075 0.107 0.884 0.113 48595
## # … with 4 more variables: ppov <dbl>, pforn <dbl>, plep <dbl>,
## # pvacant <dbl>
myscale<-function(x){as.numeric(scale(x))}
usacs<-usacs %>%
mutate_at(c("ppov","punemp", "pblack","phisp", "pfemhh", "phsormore", "medhhinc", "pvacant"),myscale)
merged<-usacs%>%filter(complete.cases(.))
head(merged)
## # A tibble: 6 x 14
## GEOID NAME totpop pwhite pblack phisp pfemhh phsormore punemp medhhinc
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 10020 Abbe… 57660 0.801 0.340 -0.515 0.267 -1.33 -0.928 -0.202
## 2 10100 Aber… 40315 0.933 -0.673 -0.570 -1.51 0.984 -1.99 0.210
## 3 10140 Aber… 72348 0.818 -0.635 -0.184 -0.483 0.0184 1.44 -0.266
## 4 10180 Abil… 164100 0.681 -0.163 0.512 0.295 -0.181 -1.05 -0.274
## 5 10220 Ada,… 37102 0.698 -0.520 -0.421 0.183 0.156 -1.21 -0.479
## 6 10300 Adri… 100456 0.877 -0.528 -0.233 -0.372 0.631 0.757 0.333
## # … with 4 more variables: ppov <dbl>, pforn <dbl>, plep <dbl>,
## # pvacant <dbl>
merged<-merge(x=MMSA2016, y=merged, by.x="mmsa", by.y="GEOID", all.x=F)
merged$wtkg3z<-as.numeric(scale(merged$wtkg3, center=T, scale=T))
merged<-merged%>%
select(wtkg3z,wtkg3, mmsa, agec, educ, race_eth,pblack,phisp,pfemhh,phsormore,medhhinc,ppov, pforn,plep,pvacant, male, mmsawt,mmsaname )%>%
filter(complete.cases(.))
head(merged[, c("wtkg3z", "male", "agec", "educ", "ppov","medhhinc", "pvacant", "mmsa")])
## wtkg3z male agec educ ppov medhhinc pvacant mmsa
## 1 -0.79553414 0 (39,59] 1somehs -0.8415807 1.428289 -0.3424294 10580
## 2 0.38247338 0 (59,79] 2hsgrad -0.8415807 1.428289 -0.3424294 10580
## 3 -0.19525993 0 (79,99] 3somecol -0.8415807 1.428289 -0.3424294 10580
## 4 -0.52847337 0 (79,99] 4colgrad -0.8415807 1.428289 -0.3424294 10580
## 5 -0.97341132 0 (79,99] 3somecol -0.8415807 1.428289 -0.3424294 10580
## 6 0.02720905 1 (39,59] 4colgrad -0.8415807 1.428289 -0.3424294 10580
meanwtkg<-mean(merged$wtkg3, na.rm=T)
sdwtkg<-sd(merged$wtkg3, na.rm=T)
fit.an<-lm(wtkg3z~as.factor(mmsa), merged)
anova(fit.an)
## Analysis of Variance Table
##
## Response: wtkg3z
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(mmsa) 121 1172 9.6874 9.7443 < 2.2e-16 ***
## Residuals 176936 175903 0.9942
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Our ANOVA test indicates significance among the higher level predictors. Variation exists, which encourages us to continue with performing a multi-modeling procedure.
library(lme4)
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(arm)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is /Users/loweticjustice
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
library(sjstats)
##
## Attaching package: 'sjstats'
## The following objects are masked from 'package:survey':
##
## cv, deff
library(sjPlot)
fit<-glmer(wtkg3z~(1|mmsa), data=merged)
## Warning in glmer(wtkg3z ~ (1 | mmsa), data = merged): calling glmer() with
## family=gaussian (identity link) as a shortcut to lmer() is deprecated;
## please call lmer() directly
arm::display(fit, detail=T)
## lme4::lmer(formula = wtkg3z ~ (1 | mmsa), data = merged)
## coef.est coef.se t value
## (Intercept) 0.02 0.01 2.12
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.08
## Residual 1.00
## ---
## number of obs: 177058, groups: mmsa, 122
## AIC = 501705, DIC = 501682.9
## deviance = 501690.8
merged$predwtkg<-sdwtkg*(fitted(fit))+meanwtkg
head(ranef(fit)$mmsa)
## (Intercept)
## 10580 -0.02488637
## 10740 -0.15490938
## 10900 -0.01776305
## 11260 0.09125945
## 12060 0.00210287
## 12260 -0.04268123
dim(ranef(fit)$mmsa)
## [1] 122 1
icc(fit)
##
## Intraclass Correlation Coefficient for Linear mixed model
##
## Family : gaussian (identity)
## Formula: wtkg3z ~ (1 | mmsa)
##
## ICC (mmsa): 0.0062
citymeans<-aggregate(cbind(wtkg3, predwtkg)~mmsaname,merged, mean)
head(citymeans, n=10)
## mmsaname
## 1 Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2 Albuquerque, NM, Metropolitan Statistical Area
## 3 Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4 Anchorage, AK, Metropolitan Statistical Area
## 5 Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6 Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
## 7 Austin-Round Rock, TX, Metropolitan Statistical Area
## 8 Baltimore-Columbia-Towson, MD, Metropolitan Statistical Area
## 9 Baton Rouge, LA, Metropolitan Statistical Area
## 10 Beckley, WV, Metropolitan Statistical Area
## wtkg3 predwtkg
## 1 0.8089004 0.8091856
## 2 0.7783352 0.7826500
## 3 0.8097252 0.8106394
## 4 0.8360406 0.8328891
## 5 0.8147465 0.8146937
## 6 0.8038068 0.8055540
## 7 0.7868824 0.7895598
## 8 0.8155866 0.8155319
## 9 0.8162920 0.8158366
## 10 0.8505861 0.8418721
The ICC is significantly low, and indicates a low degree of similarity between the higher level units. It accounts for much less than 1 percent of variation across MSA’s. Unsurprisingly, this is not too dissimilar from the bmi example.
Now, we fit the multilevel model with the individual level predictors.
fit2<-lmer(wtkg3z~male+agec+educ+(1|mmsa), data=merged, na.action=na.omit)
arm::display(fit2, detail=T)
## lmer(formula = wtkg3z ~ male + agec + educ + (1 | mmsa), data = merged,
## na.action = na.omit)
## coef.est coef.se t value
## (Intercept) -0.69 0.01 -57.64
## male 0.80 0.00 184.38
## agec(24,39] 0.41 0.01 39.74
## agec(39,59] 0.54 0.01 55.57
## agec(59,79] 0.41 0.01 43.22
## agec(79,99] 0.00 0.01 -0.11
## educ0Prim -0.13 0.02 -8.15
## educ1somehs -0.04 0.01 -3.58
## educ3somecol 0.03 0.01 5.04
## educ4colgrad -0.11 0.01 -19.86
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.08
## Residual 0.90
## ---
## number of obs: 177058, groups: mmsa, 122
## AIC = 464802, DIC = 464616.2
## deviance = 464697.0
anova(fit, fit2)
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit: wtkg3z ~ (1 | mmsa)
## fit2: wtkg3z ~ male + agec + educ + (1 | mmsa)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit 3 501697 501727 -250845 501691
## fit2 12 464721 464842 -232348 464697 36994 9 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
As expected, our individual level predictors translates into a signficant model, meaning the likelihood ratio adequately accounts for explanation of the weight outcome.
rand(fit2)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## wtkg3z ~ male + agec + educ + (1 | mmsa)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 12 -232389 464802
## (1 | mmsa) 11 -232900 465822 1022.1 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(sjstats); library(sjPlot)
icc(fit2)
##
## Intraclass Correlation Coefficient for Linear mixed model
##
## Family : gaussian (identity)
## Formula: wtkg3z ~ male + agec + educ + (1 | mmsa)
##
## ICC (mmsa): 0.0074
plot_model(fit, type = "re", sort.est="sort.all", grid=F)
The ICC accounts for more explanation in this model, but still is significantly low. Variations were signficant across MSAs in reference to average weights.