library(lme4)
## Loading required package: Matrix
library(arm)
## Loading required package: MASS
##
## arm (Version 1.10-1, built: 2018-4-12)
## Working directory is C:/Users/Monica/Documents/CSparks_Modeling
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:arm':
##
## logit
library(haven)
library(tidycensus)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#load brfss
brfss_12<-read_xpt("C:/Users/Monica/Documents/CSparks_Modeling/CNTY12.xpt")
nams<-names(brfss_12)
newnames<-gsub("_", "", nams)
names(brfss_12)<-make.names(tolower(newnames), unique = T)
brfss_12$statefip<-sprintf("%02d", brfss_12$state )
brfss_12$cofip<-sprintf("%03d", brfss_12$cnty )
brfss_12$cofips<-paste(brfss_12$statefip, brfss_12$cofip, sep="")
#insurance
brfss_12$ins<-ifelse(brfss_12$hlthpln1==1,1,0)
#smoking currently
brfss_12$smoke<-Recode(brfss_12$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#bmi
brfss_12$bmi<-ifelse(is.na(brfss_12$bmi5)==T, NA, brfss_12$bmi5/100)
#poor or fair health
brfss_12$badhealth<-ifelse(brfss_12$genhlth %in% c(4,5),1,0)
#days of poor physical health
brfss_12$poorphyshlth<-Recode(brfss_12$physhlth, recodes="77=NA; 99=NA", as.factor=T)
#race
brfss_12$black<-Recode(brfss_12$racegr2, recodes="2=1; 9=NA; else=0", as.factor=T)
brfss_12$white<-Recode(brfss_12$racegr2, recodes="1=1; 9=NA; else=0", as.factor=T)
brfss_12$other<-Recode(brfss_12$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor=T)
brfss_12$hispanic<-Recode(brfss_12$racegr2, recodes="5=1; 9=NA; else=0", as.factor=T)
#have a personal doctor?
brfss_12$doc<-Recode(brfss_12$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor=F)
#needed care in last year but couldn't get it because of cost
brfss_12$medacc<-Recode(brfss_12$medcost, recodes="1=1;2=0;else=NA")
#education level
brfss_12$lths<-Recode(brfss_12$educa, recodes="1:3=1;9=NA; else=0", as.factor=F)
brfss_12$coll<-Recode(brfss_12$educa, recodes="5:6=1;9=NA; else=0", as.factor=F)
#employment
brfss_12$employ<-Recode(brfss_12$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
brfss_12$employ<-relevel(brfss_12$employ, ref='Employed')
#marital status
brfss_12$marst<-Recode(brfss_12$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
brfss_12$marst<-relevel(brfss_12$marst, ref='married')
#income
brfss_12$inc<-as.factor(ifelse(brfss_12$incomg==9, NA, brfss_12$incomg))
#Age cut into intervals
brfss_12$agec<-cut(brfss_12$age, breaks=c(0,24,39,59,79,99))
brfss_12$agez<-as.numeric(scale(brfss_12$age, scale =T,center=T))
brfss_12$bmiz<-as.numeric(scale(brfss_12$bmi, scale = T , center=T))
brfss_12$male<-ifelse(brfss_12$sex==1, 1, 0)
brfss_12<-brfss_12%>%
#select(!mrace)%>%
select(cofips,male, agec, agez, bmi, bmiz, black, hispanic, other, marst, employ, lths, coll, badhealth, poorphyshlth)%>%
filter(complete.cases(.))
usacs<-get_acs(geography = "county", year = 2011,
variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
"DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
"DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
"DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
"DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
"DP05_0066PE","DP05_0033","DP05_0032", "DP05_0072PE", "DP02_0113PE", "DP04_0003PE") ,
summary_var = "B01001_001",
geometry = F, output = "wide")
## Getting data from the 2007-2011 5-year ACS
## Using the ACS Data Profile
## Using the ACS Data Profile
usacs<-usacs%>%
mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE/100, nwhite=DP05_0032E,nblack=DP05_0033E,
pblack=DP05_0073PE/100 , phisp=DP05_0066PE/100, pfemhh=DP02_0008PE/100,
phsormore=DP02_0066PE/100,punemp=DP03_0009PE/100, medhhinc=DP03_0062E,
ppov=DP03_0119PE/100, pforn=DP02_0092PE/100,plep=DP02_0113PE/100, pvacant=DP04_0003PE/100)%>%
select(GEOID,NAME, totpop, pwhite, pblack, phisp, pfemhh, phsormore, punemp, medhhinc, ppov, pforn,plep, pvacant)
head(usacs)
## # A tibble: 6 x 14
## GEOID NAME totpop pwhite pblack phisp pfemhh phsormore punemp medhhinc
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 01001 Auta~ 53944 0.772 0.18 0.024 0.129 0.865 0.075 53899
## 2 01003 Bald~ 179523 0.838 0.094 0.0410 0.11 0.879 0.077 51321
## 3 01005 Barb~ 27546 0.47 0.456 0.049 0.195 0.724 0.13 34041
## 4 01007 Bibb~ 22746 0.751 0.226 0.016 0.142 0.759 0.096 40506
## 5 01009 Blou~ 57140 0.891 0.013 0.078 0.096 0.732 0.091 45404
## 6 01011 Bull~ 10877 0.223 0.7 0.063 0.265 0.72 0.155 31955
## # ... with 4 more variables: ppov <dbl>, pforn <dbl>, plep <dbl>,
## # pvacant <dbl>
load(url("https://github.com/coreysparks/data/blob/master/segregation_county.Rdata?raw=true"))
#Merge files by county FIPS codes
joindata<-merge(usacs, county_dat, by.x="GEOID", by.y="cofips", all.x=T)
myscale<-function(x){as.numeric(scale(x))}
joindata<-joindata %>%
mutate_at(c("ppov","punemp", "pblack","phisp", "pfemhh", "phsormore", "medhhinc", "pvacant", "dissim_wb", "isolation_b"),myscale)
joindata<-joindata%>%filter(complete.cases(.))
head(joindata)
## GEOID NAME totpop pwhite pblack phisp
## 1 01001 Autauga County, Alabama 53944 0.772 0.65192947 -0.4142209
## 2 01003 Baldwin County, Alabama 179523 0.838 0.05308223 -0.3246504
## 3 01005 Barbour County, Alabama 27546 0.470 2.57381132 -0.2824995
## 4 01007 Bibb County, Alabama 22746 0.751 0.97224311 -0.4563717
## 5 01009 Blount County, Alabama 57140 0.891 -0.51094832 -0.1297028
## 6 01011 Bullock County, Alabama 10877 0.223 4.27286629 -0.2087356
## pfemhh phsormore punemp medhhinc ppov pforn plep
## 1 0.40446605 0.3931710 -0.2226690 0.74342558 -0.6507336 0.020 0.011
## 2 -0.03255107 0.5885862 -0.1722814 0.53687429 -0.4434858 0.037 0.024
## 3 1.92252552 -1.5749400 1.1629895 -0.84761220 0.8647654 0.030 0.020
## 4 0.70347777 -1.0864018 0.3064006 -0.32963157 -0.0937553 0.013 0.008
## 5 -0.35456368 -1.4632741 0.1804317 0.06279984 -0.2880500 0.045 0.042
## 6 3.53258860 -1.6307729 1.7928342 -1.01474407 1.2533549 0.045 0.031
## pvacant dissim_wb isolation_b interaction_bw TheileH
## 1 -0.88086012 -0.0748325 0.7954237 0.6897242 0.08299523
## 2 1.32508397 0.5034438 0.2963891 0.7673910 0.09203860
## 3 0.17373595 -0.6406195 2.0563827 0.4518021 0.04898714
## 4 0.14471037 0.7541075 1.1108393 0.6477188 0.11959518
## 5 -0.57125394 0.4307451 -0.6043052 0.9040929 0.05283605
## 6 -0.07781907 -0.3539192 3.2507893 0.2139202 0.05627216
Now, I merge the data back to the individual level data:
merged<-merge(x=brfss_12, y=joindata, by.x="cofips", by.y="GEOID")
merged<-merged%>%
filter(complete.cases(.))
Here I fit the random intercept logistic regression:
fit.mix<-lmer(as.numeric(poorphyshlth)~male+agez+lths+coll+black+hispanic+other+(1|cofips), data=merged)
library(MuMIn)
r.squaredGLMM(fit.mix)
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help
## page.
## R2m R2c
## [1,] 0.005570883 0.007601149
#cor(fitted(fit.mix),fit.mix@frame$as.numeric(poorphyshlth)^2)
#Tests of coefficitns
display(fit.mix, detail=T, digits=4)
## lmer(formula = as.numeric(poorphyshlth) ~ male + agez + lths +
## coll + black + hispanic + other + (1 | cofips), data = merged)
## coef.est coef.se t value
## (Intercept) 23.4041 0.0704 332.5909
## male 1.2604 0.0524 24.0523
## agez 0.6619 0.0273 24.2301
## lths -0.5311 0.1131 -4.6981
## coll -0.2634 0.0608 -4.3337
## black1 0.2306 0.0947 2.4351
## hispanic1 0.4573 0.1051 4.3529
## other1 0.1670 0.1061 1.5741
##
## Error terms:
## Groups Name Std.Dev.
## cofips (Intercept) 0.5247
## Residual 11.6015
## ---
## number of obs: 202449, groups: cofips, 209
## AIC = 1.56724e+06, DIC = 1567160
## deviance = 1567188.9
#confidence intervals for the Odds ratio
exp(confint(fit.mix, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 1.271717e+10 1.675664e+10
## male 3.182487e+00 3.908180e+00
## agez 1.837433e+00 2.045112e+00
## lths 4.710886e-01 7.337729e-01
## coll 6.821073e-01 8.656378e-01
## black1 1.046019e+00 1.516194e+00
## hispanic1 1.285801e+00 1.940963e+00
## other1 9.598728e-01 1.455066e+00
Here is a model that only considers the macro level variable on the outcome:
fit.mix.macro<-lmer(as.numeric(poorphyshlth)~ppov+pblack+phsormore+(1|cofips),data=merged)
r.squaredGLMM(fit.mix.macro)
## R2m R2c
## [1,] 0.0002255069 0.002181967
display(fit.mix.macro, detail=T, digits=4)
## lmer(formula = as.numeric(poorphyshlth) ~ ppov + pblack + phsormore +
## (1 | cofips), data = merged)
## coef.est coef.se t value
## (Intercept) 23.7228 0.0644 368.3890
## ppov -0.5672 0.1463 -3.8765
## pblack 0.1157 0.0574 2.0158
## phsormore -0.2694 0.1045 -2.5784
##
## Error terms:
## Groups Name Std.Dev.
## cofips (Intercept) 0.5151
## Residual 11.6334
## ---
## number of obs: 202449, groups: cofips, 209
## AIC = 1.56833e+06, DIC = 1568285
## deviance = 1568299.3
#confidence intervals for the Odds ratio
exp(confint(fit.mix.macro, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 1.769490e+10 2.277592e+10
## ppov 4.257408e-01 7.554791e-01
## pblack 1.003207e+00 1.256243e+00
## phsormore 6.223573e-01 9.374157e-01
anova(fit.mix, fit.mix.macro)
## refitting model(s) with ML (instead of REML)
## Data: merged
## Models:
## fit.mix.macro: as.numeric(poorphyshlth) ~ ppov + pblack + phsormore + (1 | cofips)
## fit.mix: as.numeric(poorphyshlth) ~ male + agez + lths + coll + black +
## fit.mix: hispanic + other + (1 | cofips)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## fit.mix.macro 6 1568311 1568373 -784150 1568299
## fit.mix 10 1567209 1567311 -783594 1567189 1110.4 4 < 2.2e-16
##
## fit.mix.macro
## fit.mix ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.mix.cross<-lmer(as.numeric(poorphyshlth)~male+agez+lths+coll+(black+hispanic+other)*ppov+ (1|cofips), data=merged)
display(fit.mix.cross, detail=T)
## lmer(formula = as.numeric(poorphyshlth) ~ male + agez + lths +
## coll + (black + hispanic + other) * ppov + (1 | cofips),
## data = merged)
## coef.est coef.se t value
## (Intercept) 23.36 0.08 283.50
## male 1.26 0.05 23.99
## agez 0.66 0.03 24.25
## lths -0.52 0.11 -4.58
## coll -0.27 0.06 -4.37
## black1 0.23 0.10 2.27
## hispanic1 0.42 0.11 3.69
## other1 -0.06 0.14 -0.41
## ppov -0.13 0.10 -1.32
## black1:ppov -0.03 0.17 -0.18
## hispanic1:ppov -0.23 0.17 -1.37
## other1:ppov -0.56 0.20 -2.77
##
## Error terms:
## Groups Name Std.Dev.
## cofips (Intercept) 0.52
## Residual 11.60
## ---
## number of obs: 202449, groups: cofips, 209
## AIC = 1.56724e+06, DIC = 1567137
## deviance = 1567173.6
exp(confint(fit.mix.cross, method="Wald"))
## 2.5 % 97.5 %
## .sig01 NA NA
## .sigma NA NA
## (Intercept) 1.182692e+10 1.633494e+10
## male 3.173221e+00 3.896925e+00
## agez 1.838839e+00 2.046734e+00
## lths 4.771229e-01 7.433939e-01
## coll 6.804915e-01 8.635975e-01
## black1 1.031983e+00 1.529235e+00
## hispanic1 1.217222e+00 1.899217e+00
## other1 7.260826e-01 1.233537e+00
## ppov 7.251219e-01 1.065154e+00
## black1:ppov 6.878479e-01 1.363276e+00
## hispanic1:ppov 5.643470e-01 1.107680e+00
## other1:ppov 3.832547e-01 8.481125e-01