Load packages and BRFSS Data
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(sjPlot)
## Warning: package 'sjPlot' was built under R version 4.0.4
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
library(ggplot2)
library(pander)
library(knitr)
library(tidycensus)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
set.seed(12345)
#samps<-sample(1:nrow(brfss_17), size = 40000, replace=F)
#brfss_17<-brfss_17[samps,]
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss_17)
#we see some names are lower case, some are upper and some have a little _ in the first position. This is a nightmare.
newnames<-gsub(pattern = "_",
replacement = "",
x = nams)
names(brfss_17)<-tolower(newnames)
#sex
brfss_17$male<-ifelse(brfss_17$sex==1, 1, 0)
#BMI
brfss_17$bmi<-ifelse(is.na(brfss_17$bmi5)==T, NA, brfss_17$bmi5/100)
#Healthy days
brfss_17$healthdays<-Recode(brfss_17$physhlth, recodes = "88=0; 77=NA; 99=NA")
#Healthy mental health days
brfss_17$healthmdays<-Recode(brfss_17$menthlth, recodes = "88=0; 77=NA; 99=NA")
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")
brfss_17$race_eth<-Recode(brfss_17$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss_17$race_eth<-relevel(brfss_17$race_eth, ref = "nhwhite")
#insurance
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
#income grouping
brfss_17$inc<-Recode(brfss_17$incomg, recodes = "9= NA;1='1_lt15k'; 2='2_15-25k';3='3_25-35k';4='4_35-50k';5='5_50kplus'", as.factor = T)
brfss_17$inc<-as.ordered(brfss_17$inc)
#education level
brfss_17$educ<-Recode(brfss_17$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
brfss_17$educ<-relevel(brfss_17$educ, ref='2hsgrad')
#employment
brfss_17$employ<-Recode(brfss_17$employ1,
recodes="1:2='employloyed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='employloyed')
#marital status
brfss_17$marst<-Recode(brfss_17$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst,
ref='married')
#Age cut into intervals
brfss_17$agec<-cut(brfss_17$age80,
breaks=c(0,24,39,59,79,99))
#BMI, in the brfss_17a the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's
brfss_17$bmi<-brfss_17$bmi5/100
#smoking currently
brfss_17$smoke<-Recode(brfss_17$smoker3,
recodes="1:2=1; 3:4=0; else=NA")
#brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")
brfss_17$obese<-ifelse(is.na(brfss_17$bmi)==T, NA,
ifelse(brfss_17$bmi>30,1,0))
usacs<-get_acs(geography = "metropolitan statistical area/micropolitan statistical area", year = 2011,
variables=c( "DP05_0001E",
"DP03_0062E",
"DP04_0003PE") ,
summary_var = "B01001_001",
geometry = F,
output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Using the ACS Data Profile
usacs<-usacs%>%
mutate(totpop= DP05_0001E,
medhhinc=DP03_0062E,
pvacant=DP04_0003PE/100)%>%
dplyr::select(GEOID,NAME, totpop, medhhinc, pvacant)
head(usacs)
## # A tibble: 6 x 5
## GEOID NAME totpop medhhinc pvacant
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 15100 Brookings, SD Micro Area 31639 46101 0.107
## 2 15140 Brownsville, TN Micro Area 18847 33504 0.135
## 3 15180 Brownsville-Harlingen, TX Metro Area 400332 32156 0.188
## 4 15220 Brownwood, TX Micro Area 38136 39965 0.264
## 5 15260 Brunswick, GA Metro Area 110738 46199 0.244
## 6 15340 Bucyrus, OH Micro Area 43998 41336 0.113
Clean and standardize contextual data
myscale<-function(x){as.numeric(scale(x))}
usacs<-usacs %>%
mutate_at(c( "medhhinc", "pvacant"),myscale)
merged<-usacs%>%
filter(complete.cases(.))
head(merged)
## # A tibble: 6 x 5
## GEOID NAME totpop medhhinc pvacant
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 15100 Brookings, SD Micro Area 31639 0.0782 -0.485
## 2 15140 Brownsville, TN Micro Area 18847 -1.21 -0.0850
## 3 15180 Brownsville-Harlingen, TX Metro Area 400332 -1.34 0.673
## 4 15220 Brownwood, TX Micro Area 38136 -0.548 1.76
## 5 15260 Brunswick, GA Metro Area 110738 0.0882 1.47
## 6 15340 Bucyrus, OH Micro Area 43998 -0.408 -0.400
merged<-merge(x=brfss_17,
y=merged,
by.x="mmsa",
by.y="GEOID",
all.x=F)
Merge MSA data to BRFSS data
merged$bmiz<-as.numeric(scale(merged$bmi,
center=T,
scale=T))
#merged<-merged[complete.cases(merged[, c("bmiz", "race_eth", "agec", "educ", "gini")]),]
#and merge the data back to the kids data
merged<-merged%>%
dplyr::select(bmiz, obese, mmsa, agec, educ, race_eth,smoke, healthmdays, badhealth,bmi,medhhinc,pvacant, male, mmsawt, mmsaname )%>%
filter(complete.cases(.))
head(merged[, c("bmiz", "male", "agec", "educ","medhhinc", "pvacant", "mmsa")])
## bmiz male agec educ medhhinc pvacant mmsa
## 1 -0.7061616 1 (39,59] 3somecol 0.2101566 -0.6427496 10100
## 2 -0.5879484 1 (24,39] 3somecol 0.2101566 -0.6427496 10100
## 3 -1.2732656 0 (59,79] 3somecol 0.2101566 -0.6427496 10100
## 4 -0.8770916 1 (59,79] 4colgrad 0.2101566 -0.6427496 10100
## 5 -0.5000872 0 (59,79] 2hsgrad 0.2101566 -0.6427496 10100
## 6 -0.5639863 0 (39,59] 2hsgrad 0.2101566 -0.6427496 10100
Initial test for significant variation b/w groups using ANOVA
fit.ob<-glm(badhealth~as.factor(mmsa),
family=binomial,
merged)
anova(fit.ob, test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: badhealth
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 159428 149062
## as.factor(mmsa) 117 2104.3 159311 146958 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# There is significant variation
Estimate with random intercept model
library(lme4)
library(lmerTest)
## Warning: package 'lmerTest' was built under R version 4.0.5
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(arm)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## arm (Version 1.11-2, built: 2020-7-27)
## Working directory is C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2
##
## Attaching package: 'arm'
## The following object is masked from 'package:car':
##
## logit
fit<-glmer(badhealth~(1|mmsa),
family=binomial, merged)
arm::display(fit,
detail=T)
## glmer(formula = badhealth ~ (1 | mmsa), data = merged, family = binomial)
## coef.est coef.se z value Pr(>|z|)
## (Intercept) -1.50 0.03 -56.61 0.00
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.28
## Residual 1.00
## ---
## number of obs: 159429, groups: mmsa, 118
## AIC = 147379, DIC = 146562.8
## deviance = 146968.7
# The ICC is 0.072, calculated by the equation: (.28^2)/(1+.28^2)
# The variance between cities is .28
fit2<-glmer(badhealth~male+agec+educ+(1|mmsa),
family=binomial,merged,
na.action=na.omit)
arm::display(fit2, detail=T)
## glmer(formula = badhealth ~ male + agec + educ + (1 | mmsa),
## data = merged, family = binomial, na.action = na.omit)
## coef.est coef.se z value Pr(>|z|)
## (Intercept) -2.10 0.04 -49.00 0.00
## male -0.07 0.01 -4.77 0.00
## agec(24,39] 0.47 0.04 11.36 0.00
## agec(39,59] 1.05 0.04 27.39 0.00
## agec(59,79] 1.21 0.04 31.83 0.00
## agec(79,99] 1.31 0.04 30.93 0.00
## educ0Prim 0.92 0.04 22.28 0.00
## educ1somehs 0.77 0.03 26.94 0.00
## educ3somecol -0.33 0.02 -19.09 0.00
## educ4colgrad -1.12 0.02 -61.84 0.00
##
## Error terms:
## Groups Name Std.Dev.
## mmsa (Intercept) 0.21
## Residual 1.00
## ---
## number of obs: 159429, groups: mmsa, 118
## AIC = 137845, DIC = 137152.1
## deviance = 137487.4
# The between cities variance decreased to 0.21 after including individual level variables. The ICC is 0.042. According to the model, those with some college or college graduates, have lower odds of reporting poor self-rated health compared to high school graduates. Those with less than high school education are more likely to report poor health.
anova(fit, fit2, test="Chisq")
## Data: merged
## Models:
## fit: badhealth ~ (1 | mmsa)
## fit2: badhealth ~ male + agec + educ + (1 | mmsa)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## fit 2 147379 147399 -73687 147375
## fit2 11 137845 137954 -68911 137823 9551.9 9 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# The inclusion of individual level variables improves the overall model fit. AIC decreased from the base model to the final one, this difference is significant.