R.H.Lowe Jr. | UTSA | DEM 7283

Research Purpose

The BRFSS dataset was used for this assignment. The outcome variable (wtkg3) is continuous and pertains to respondent’s reported weight in kilograms. The given structrual levels for this analysis are MSAs. The purpose of this assignment is to examine how 1). selected multi-level propositions affect respondent’s wtk3, and 2). how individual-level factors affect their respondent’s wtk3.

#load brfss
library(car) 
## Loading required package: carData
library(stargazer) 
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey) 
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(sjPlot) 
library(ggplot2) 
library(pander) 
library(knitr) 
library(tidycensus) 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Variable names are changed in case and made neater.

library(haven)
MMSA2016 <- read_xpt("Desktop/Dem 7113/Week 4/MMSA2016.xpt")

set.seed(12345)
nams<-names(MMSA2016)
newnames<-gsub(pattern = "_", replacement = "", x = nams)
names(MMSA2016)<-tolower(newnames)

I use similar variables from the class example, but my outcome variable is weight in kilograms.

#sex
MMSA2016$male<-ifelse(MMSA2016$sex==1, 1, 0)
#BMI
MMSA2016$wtkg3<-ifelse(is.na(MMSA2016$wtkg3)==T, NA, MMSA2016$wtkg3/100)
#race/ethnicity
MMSA2016$black<-Recode(MMSA2016$racegr3, recodes="2=1; 9=NA; else=0")
MMSA2016$white<-Recode(MMSA2016$racegr3, recodes="1=1; 9=NA; else=0")
MMSA2016$other<-Recode(MMSA2016$racegr3, recodes="3:4=1; 9=NA; else=0")
MMSA2016$hispanic<-Recode(MMSA2016$racegr3, recodes="5=1; 9=NA; else=0")
MMSA2016$race_eth<-Recode(MMSA2016$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; e
lse=NA",
as.factor = T)
MMSA2016$race_eth<-relevel(MMSA2016$race_eth, ref = "nhwhite")
#income grouping
MMSA2016$inc<-Recode(MMSA2016$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor = T)
MMSA2016$inc<-as.ordered(MMSA2016$inc)
#education level
MMSA2016$educ<-Recode(MMSA2016$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
MMSA2016$educ<-relevel(MMSA2016$educ, ref='2hsgrad')
#marital status
MMSA2016$marst<-Recode(MMSA2016$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab';
else=NA",
as.factor=T)
MMSA2016$marst<-relevel(MMSA2016$marst, ref='married')
#Age cut into intervals
MMSA2016$agec<-cut(MMSA2016$age80, breaks=c(0,24,39,59,79,99))
MMSA2016$wtkg3<-MMSA2016$wtkg3/100

Higher level predictors

The following series of code involve querying ACS data pertaining to MSAs, in preparation to merge with BRFSS dataset.

head(data.frame(name=table(MMSA2016$mmsaname),id=unique(MMSA2016$mmsa)))
##                                                          name.Var1
## 1       Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2                   Albuquerque, NM, Metropolitan Statistical Area
## 3 Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4                     Anchorage, AK, Metropolitan Statistical Area
## 5 Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6    Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
##   name.Freq    id
## 1      2901 10580
## 2      1300 10740
## 3       675 10900
## 4      1028 11260
## 5      2444 12060
## 6       849 12260
length(table(MMSA2016$mmsa))
## [1] 143
census_api_key("f3d69cc5bbda135c4852cf616317f7eca5407001")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
usacs<-get_acs(geography="metropolitan statistical area/micropolitan statistical area", year = 2011, variables=c( "DP05_0001E","DP03_0009P","DP03_0062E","DP03_0119PE","DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092P","DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099P","DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE","DP02_0038E","DP03_0101PE","DP05_0066PE","DP05_0033","DP05_0032", "DP05_0072PE","DP02_0113PE",  "DP04_0003PE"), summary_var = "B01001_001", geometry = F, output = "wide")       
## Getting data from the 2007-2011 5-year ACS
## Using the ACS Data Profile
## Using the ACS Data Profile
usacs<-usacs%>%
  mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE/100,nwhite=DP05_0032E,nblack=DP05_0033E,pblack=DP05_0073PE/100 , phisp=DP05_0066PE/100, pfemhh=DP02_0008PE/100,phsormore=DP02_0066PE/100,punemp=DP03_0009PE/100, medhhinc=DP03_0062E,ppov=DP03_0119PE/100,pforn=DP02_0092PE/100,plep=DP02_0113PE/100,pvacant=DP04_0003PE/100)%>%
  select(GEOID,NAME, totpop, pwhite, pblack, phisp, pfemhh, phsormore, punemp,medhhinc, ppov, pforn,plep, pvacant)

head(usacs)
## # A tibble: 6 x 14
##   GEOID NAME  totpop pwhite pblack  phisp pfemhh phsormore punemp medhhinc
##   <chr> <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>     <dbl>  <dbl>    <dbl>
## 1 10020 Abbe…  57660  0.801  0.138 0.024   0.13      0.756  0.059    43349
## 2 10100 Aber…  40315  0.933  0.005 0.0140  0.066     0.907  0.025    47395
## 3 10140 Aber…  72348  0.818  0.01  0.084   0.103     0.844  0.135    42729
## 4 10180 Abil… 164100  0.681  0.072 0.21    0.131     0.831  0.055    42644
## 5 10220 Ada,…  37102  0.698  0.025 0.0410  0.127     0.853  0.05     40641
## 6 10300 Adri… 100456  0.877  0.024 0.075   0.107     0.884  0.113    48595
## # … with 4 more variables: ppov <dbl>, pforn <dbl>, plep <dbl>,
## #   pvacant <dbl>
myscale<-function(x){as.numeric(scale(x))} 
usacs<-usacs %>%
  mutate_at(c("ppov","punemp", "pblack","phisp", "pfemhh", "phsormore", "medhhinc", "pvacant"),myscale)
merged<-usacs%>%filter(complete.cases(.))
head(merged)
## # A tibble: 6 x 14
##   GEOID NAME  totpop pwhite pblack  phisp pfemhh phsormore punemp medhhinc
##   <chr> <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>     <dbl>  <dbl>    <dbl>
## 1 10020 Abbe…  57660  0.801  0.340 -0.515  0.267   -1.33   -0.928   -0.202
## 2 10100 Aber…  40315  0.933 -0.673 -0.570 -1.51     0.984  -1.99     0.210
## 3 10140 Aber…  72348  0.818 -0.635 -0.184 -0.483    0.0184  1.44    -0.266
## 4 10180 Abil… 164100  0.681 -0.163  0.512  0.295   -0.181  -1.05    -0.274
## 5 10220 Ada,…  37102  0.698 -0.520 -0.421  0.183    0.156  -1.21    -0.479
## 6 10300 Adri… 100456  0.877 -0.528 -0.233 -0.372    0.631   0.757    0.333
## # … with 4 more variables: ppov <dbl>, pforn <dbl>, plep <dbl>,
## #   pvacant <dbl>
 merged<-merge(x=MMSA2016, y=merged, by.x="mmsa", by.y="GEOID", all.x=F)
merged$wtkg3z<-as.numeric(scale(merged$wtkg3, center=T, scale=T))
merged<-merged%>%
  select(wtkg3z,wtkg3, mmsa, agec, educ, race_eth,pblack,phisp,pfemhh,phsormore,medhhinc,ppov, pforn,plep,pvacant, male, mmsawt,mmsaname )%>%
  filter(complete.cases(.))
head(merged[, c("wtkg3z", "male", "agec", "educ", "ppov","medhhinc", "pvacant", "mmsa")])
##        wtkg3z male    agec     educ       ppov medhhinc    pvacant  mmsa
## 1 -0.79553414    0 (39,59]  1somehs -0.8415807 1.428289 -0.3424294 10580
## 2  0.38247338    0 (59,79]  2hsgrad -0.8415807 1.428289 -0.3424294 10580
## 3 -0.19525993    0 (79,99] 3somecol -0.8415807 1.428289 -0.3424294 10580
## 4 -0.52847337    0 (79,99] 4colgrad -0.8415807 1.428289 -0.3424294 10580
## 5 -0.97341132    0 (79,99] 3somecol -0.8415807 1.428289 -0.3424294 10580
## 6  0.02720905    1 (39,59] 4colgrad -0.8415807 1.428289 -0.3424294 10580
meanwtkg<-mean(merged$wtkg3, na.rm=T)
sdwtkg<-sd(merged$wtkg3, na.rm=T)

fit.an<-lm(wtkg3z~as.factor(mmsa), merged)
anova(fit.an)
## Analysis of Variance Table
## 
## Response: wtkg3z
##                     Df Sum Sq Mean Sq F value    Pr(>F)    
## as.factor(mmsa)    121   1172  9.6874  9.7443 < 2.2e-16 ***
## Residuals       176936 175903  0.9942                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Our ANOVA test indicates significance among the higher level predictors. Variation exists, which encourages us to continue with performing a multi-modeling procedure.

library(lme4) 
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
library(arm)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is /Users/loweticjustice
## 
## Attaching package: 'arm'
## The following object is masked from 'package:car':
## 
##     logit
library(sjstats) 
## 
## Attaching package: 'sjstats'
## The following objects are masked from 'package:survey':
## 
##     cv, deff
library(sjPlot)
fit<-glmer(wtkg3z~(1|mmsa), data=merged)
## Warning in glmer(wtkg3z ~ (1 | mmsa), data = merged): calling glmer() with
## family=gaussian (identity link) as a shortcut to lmer() is deprecated;
## please call lmer() directly
arm::display(fit, detail=T)
## lme4::lmer(formula = wtkg3z ~ (1 | mmsa), data = merged)
##             coef.est coef.se t value
## (Intercept) 0.02     0.01    2.12   
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.08    
##  Residual             1.00    
## ---
## number of obs: 177058, groups: mmsa, 122
## AIC = 501705, DIC = 501682.9
## deviance = 501690.8
merged$predwtkg<-sdwtkg*(fitted(fit))+meanwtkg
head(ranef(fit)$mmsa)
##       (Intercept)
## 10580 -0.02488637
## 10740 -0.15490938
## 10900 -0.01776305
## 11260  0.09125945
## 12060  0.00210287
## 12260 -0.04268123
dim(ranef(fit)$mmsa)
## [1] 122   1
icc(fit)
## 
## Intraclass Correlation Coefficient for Linear mixed model
## 
## Family : gaussian (identity)
## Formula: wtkg3z ~ (1 | mmsa)
## 
##   ICC (mmsa): 0.0062
citymeans<-aggregate(cbind(wtkg3, predwtkg)~mmsaname,merged, mean)
head(citymeans, n=10)
##                                                            mmsaname
## 1        Albany-Schenectady-Troy, NY, Metropolitan Statistical Area
## 2                    Albuquerque, NM, Metropolitan Statistical Area
## 3  Allentown-Bethlehem-Easton, PA-NJ, Metropolitan Statistical Area
## 4                      Anchorage, AK, Metropolitan Statistical Area
## 5  Atlanta-Sandy Springs-Roswell, GA, Metropolitan Statistical Area
## 6     Augusta-Richmond County, GA-SC, Metropolitan Statistical Area
## 7              Austin-Round Rock, TX, Metropolitan Statistical Area
## 8      Baltimore-Columbia-Towson, MD, Metropolitan Statistical Area
## 9                    Baton Rouge, LA, Metropolitan Statistical Area
## 10                       Beckley, WV, Metropolitan Statistical Area
##        wtkg3  predwtkg
## 1  0.8089004 0.8091856
## 2  0.7783352 0.7826500
## 3  0.8097252 0.8106394
## 4  0.8360406 0.8328891
## 5  0.8147465 0.8146937
## 6  0.8038068 0.8055540
## 7  0.7868824 0.7895598
## 8  0.8155866 0.8155319
## 9  0.8162920 0.8158366
## 10 0.8505861 0.8418721

The ICC is significantly low, and indicates a low degree of similarity between the higher level units. It accounts for much less than 1 percent of variation across MSA’s. Unsurprisingly, this is not too dissimilar from the bmi example.

Now, we fit the multilevel model with the individual level predictors.

Individual level predictors

fit2<-lmer(wtkg3z~male+agec+educ+(1|mmsa), data=merged,  na.action=na.omit)
arm::display(fit2, detail=T)
## lmer(formula = wtkg3z ~ male + agec + educ + (1 | mmsa), data = merged, 
##     na.action = na.omit)
##              coef.est coef.se t value
## (Intercept)   -0.69     0.01  -57.64 
## male           0.80     0.00  184.38 
## agec(24,39]    0.41     0.01   39.74 
## agec(39,59]    0.54     0.01   55.57 
## agec(59,79]    0.41     0.01   43.22 
## agec(79,99]    0.00     0.01   -0.11 
## educ0Prim     -0.13     0.02   -8.15 
## educ1somehs   -0.04     0.01   -3.58 
## educ3somecol   0.03     0.01    5.04 
## educ4colgrad  -0.11     0.01  -19.86 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  mmsa     (Intercept) 0.08    
##  Residual             0.90    
## ---
## number of obs: 177058, groups: mmsa, 122
## AIC = 464802, DIC = 464616.2
## deviance = 464697.0
anova(fit, fit2)
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit: wtkg3z ~ (1 | mmsa)
## fit2: wtkg3z ~ male + agec + educ + (1 | mmsa)
##      Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)    
## fit   3 501697 501727 -250845   501691                            
## fit2 12 464721 464842 -232348   464697 36994      9  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

As expected, our individual level predictors translates into a signficant model, meaning the likelihood ratio adequately accounts for explanation of the weight outcome.

rand(fit2)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## wtkg3z ~ male + agec + educ + (1 | mmsa)
##            npar  logLik    AIC    LRT Df Pr(>Chisq)    
## <none>       12 -232389 464802                         
## (1 | mmsa)   11 -232900 465822 1022.1  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(sjstats); library(sjPlot)
icc(fit2)
## 
## Intraclass Correlation Coefficient for Linear mixed model
## 
## Family : gaussian (identity)
## Formula: wtkg3z ~ male + agec + educ + (1 | mmsa)
## 
##   ICC (mmsa): 0.0074
plot_model(fit, type = "re", sort.est="sort.all", grid=F)

The ICC accounts for more explanation in this model, but still is significantly low. Variations were signficant across MSAs in reference to average weights.