Homework 3

library(lme4)

## Loading required package: Matrix

library(arm)

## Loading required package: MASS

## 
## arm (Version 1.10-1, built: 2018-4-12)

## Working directory is C:/Users/Monica/Documents/CSparks_Modeling

library(car)

## Loading required package: carData

## 
## Attaching package: 'car'

## The following object is masked from 'package:arm':
## 
##     logit

library(haven)
library(tidycensus)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following object is masked from 'package:MASS':
## 
##     select

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#load brfss
brfss_12<-read_xpt("C:/Users/Monica/Documents/CSparks_Modeling/CNTY12.xpt")

nams<-names(brfss_12)
newnames<-gsub("_", "", nams)
names(brfss_12)<-make.names(tolower(newnames), unique = T)

brfss_12$statefip<-sprintf("%02d", brfss_12$state )
brfss_12$cofip<-sprintf("%03d", brfss_12$cnty )
brfss_12$cofips<-paste(brfss_12$statefip, brfss_12$cofip, sep="")

#insurance
brfss_12$ins<-ifelse(brfss_12$hlthpln1==1,1,0)
#smoking currently
brfss_12$smoke<-Recode(brfss_12$smoker3, recodes="1:2=1; 3:4=0; else=NA")
#bmi
brfss_12$bmi<-ifelse(is.na(brfss_12$bmi5)==T, NA, brfss_12$bmi5/100)
#poor or fair health
brfss_12$badhealth<-ifelse(brfss_12$genhlth %in% c(4,5),1,0)

#days of poor physical health
brfss_12$poorphyshlth<-Recode(brfss_12$physhlth, recodes="77=NA; 99=NA", as.factor=T)

#race
brfss_12$black<-Recode(brfss_12$racegr2, recodes="2=1; 9=NA; else=0", as.factor=T)
brfss_12$white<-Recode(brfss_12$racegr2, recodes="1=1; 9=NA; else=0", as.factor=T)
brfss_12$other<-Recode(brfss_12$racegr2, recodes="3:4=1; 9=NA; else=0", as.factor=T)
brfss_12$hispanic<-Recode(brfss_12$racegr2, recodes="5=1; 9=NA; else=0", as.factor=T)

#have a personal doctor?
brfss_12$doc<-Recode(brfss_12$persdoc2, recodes="1:2=1; 3=0; else=NA", as.factor=F)

#needed care in last year but couldn't get it because of cost
brfss_12$medacc<-Recode(brfss_12$medcost, recodes="1=1;2=0;else=NA")

#education level
brfss_12$lths<-Recode(brfss_12$educa, recodes="1:3=1;9=NA; else=0", as.factor=F)
brfss_12$coll<-Recode(brfss_12$educa, recodes="5:6=1;9=NA; else=0", as.factor=F)

#employment
brfss_12$employ<-Recode(brfss_12$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
brfss_12$employ<-relevel(brfss_12$employ, ref='Employed')

#marital status
brfss_12$marst<-Recode(brfss_12$marital, recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA", as.factor=T)
brfss_12$marst<-relevel(brfss_12$marst, ref='married')

#income
brfss_12$inc<-as.factor(ifelse(brfss_12$incomg==9, NA, brfss_12$incomg))

#Age cut into intervals
brfss_12$agec<-cut(brfss_12$age, breaks=c(0,24,39,59,79,99))

brfss_12$agez<-as.numeric(scale(brfss_12$age, scale =T,center=T))

brfss_12$bmiz<-as.numeric(scale(brfss_12$bmi, scale = T , center=T))

brfss_12$male<-ifelse(brfss_12$sex==1, 1, 0)
brfss_12<-brfss_12%>%
  #select(!mrace)%>%
  select(cofips,male, agec, agez, bmi, bmiz, black, hispanic, other,  marst, employ, lths, coll, badhealth, poorphyshlth)%>%
  filter(complete.cases(.))

usacs<-get_acs(geography = "county", year = 2011,
                variables=c( "DP05_0001E", "DP03_0009P", "DP03_0062E", "DP03_0119PE",
                             "DP05_0001E","DP02_0009PE","DP02_0008PE", "DP02_0040E","DP02_0038E",
                             "DP02_0066PE","DP02_0067PE","DP02_0080PE","DP02_0092PE",
                             "DP03_0005PE","DP03_0028PE","DP03_0062E","DP03_0099PE","DP03_0101PE",
                             "DP03_0119PE","DP04_0046PE","DP05_0072PE","DP05_0073PE",
                             "DP05_0066PE","DP05_0033","DP05_0032", "DP05_0072PE", "DP02_0113PE",  "DP04_0003PE") ,
                summary_var = "B01001_001",
                geometry = F, output = "wide")

## Getting data from the 2007-2011 5-year ACS

## Using the ACS Data Profile
## Using the ACS Data Profile

usacs<-usacs%>%
  mutate(totpop= DP05_0001E, fertrate = DP02_0040E,pwhite=DP05_0072PE/100, nwhite=DP05_0032E,nblack=DP05_0033E,
         pblack=DP05_0073PE/100 , phisp=DP05_0066PE/100, pfemhh=DP02_0008PE/100,
         phsormore=DP02_0066PE/100,punemp=DP03_0009PE/100, medhhinc=DP03_0062E,
         ppov=DP03_0119PE/100, pforn=DP02_0092PE/100,plep=DP02_0113PE/100,  pvacant=DP04_0003PE/100)%>%
  select(GEOID,NAME, totpop, pwhite, pblack, phisp, pfemhh, phsormore, punemp, medhhinc, ppov, pforn,plep, pvacant)


head(usacs)

## # A tibble: 6 x 14
##   GEOID NAME  totpop pwhite pblack  phisp pfemhh phsormore punemp medhhinc
##   <chr> <chr>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>     <dbl>  <dbl>    <dbl>
## 1 01001 Auta~  53944  0.772  0.18  0.024   0.129     0.865  0.075    53899
## 2 01003 Bald~ 179523  0.838  0.094 0.0410  0.11      0.879  0.077    51321
## 3 01005 Barb~  27546  0.47   0.456 0.049   0.195     0.724  0.13     34041
## 4 01007 Bibb~  22746  0.751  0.226 0.016   0.142     0.759  0.096    40506
## 5 01009 Blou~  57140  0.891  0.013 0.078   0.096     0.732  0.091    45404
## 6 01011 Bull~  10877  0.223  0.7   0.063   0.265     0.72   0.155    31955
## # ... with 4 more variables: ppov <dbl>, pforn <dbl>, plep <dbl>,
## #   pvacant <dbl>

load(url("https://github.com/coreysparks/data/blob/master/segregation_county.Rdata?raw=true"))

#Merge files by county FIPS codes
joindata<-merge(usacs, county_dat, by.x="GEOID", by.y="cofips", all.x=T)

Clean and standardize contextual data

myscale<-function(x){as.numeric(scale(x))}
joindata<-joindata %>%
  mutate_at(c("ppov","punemp", "pblack","phisp", "pfemhh", "phsormore", "medhhinc", "pvacant", "dissim_wb", "isolation_b"),myscale)

joindata<-joindata%>%filter(complete.cases(.))
head(joindata)

##   GEOID                    NAME totpop pwhite      pblack      phisp
## 1 01001 Autauga County, Alabama  53944  0.772  0.65192947 -0.4142209
## 2 01003 Baldwin County, Alabama 179523  0.838  0.05308223 -0.3246504
## 3 01005 Barbour County, Alabama  27546  0.470  2.57381132 -0.2824995
## 4 01007    Bibb County, Alabama  22746  0.751  0.97224311 -0.4563717
## 5 01009  Blount County, Alabama  57140  0.891 -0.51094832 -0.1297028
## 6 01011 Bullock County, Alabama  10877  0.223  4.27286629 -0.2087356
##        pfemhh  phsormore     punemp    medhhinc       ppov pforn  plep
## 1  0.40446605  0.3931710 -0.2226690  0.74342558 -0.6507336 0.020 0.011
## 2 -0.03255107  0.5885862 -0.1722814  0.53687429 -0.4434858 0.037 0.024
## 3  1.92252552 -1.5749400  1.1629895 -0.84761220  0.8647654 0.030 0.020
## 4  0.70347777 -1.0864018  0.3064006 -0.32963157 -0.0937553 0.013 0.008
## 5 -0.35456368 -1.4632741  0.1804317  0.06279984 -0.2880500 0.045 0.042
## 6  3.53258860 -1.6307729  1.7928342 -1.01474407  1.2533549 0.045 0.031
##       pvacant  dissim_wb isolation_b interaction_bw    TheileH
## 1 -0.88086012 -0.0748325   0.7954237      0.6897242 0.08299523
## 2  1.32508397  0.5034438   0.2963891      0.7673910 0.09203860
## 3  0.17373595 -0.6406195   2.0563827      0.4518021 0.04898714
## 4  0.14471037  0.7541075   1.1108393      0.6477188 0.11959518
## 5 -0.57125394  0.4307451  -0.6043052      0.9040929 0.05283605
## 6 -0.07781907 -0.3539192   3.2507893      0.2139202 0.05627216

Now, I merge the data back to the individual level data:

merged<-merge(x=brfss_12, y=joindata, by.x="cofips", by.y="GEOID")
merged<-merged%>%
  filter(complete.cases(.))

Here I fit the random intercept logistic regression:

Research Question: Do individuals who are racial/ethnic minorities more likely than Whites to experience a greater number of poor physical health days?

Model 1 - individual level effects

fit.mix<-lmer(as.numeric(poorphyshlth)~male+agez+lths+coll+black+hispanic+other+(1|cofips), data=merged)

library(MuMIn)
r.squaredGLMM(fit.mix)

## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help
## page.

##              R2m         R2c
## [1,] 0.005570883 0.007601149

#cor(fitted(fit.mix),fit.mix@frame$as.numeric(poorphyshlth)^2)

#Tests of coefficitns
display(fit.mix, detail=T, digits=4)

## lmer(formula = as.numeric(poorphyshlth) ~ male + agez + lths + 
##     coll + black + hispanic + other + (1 | cofips), data = merged)
##             coef.est coef.se  t value 
## (Intercept)  23.4041   0.0704 332.5909
## male          1.2604   0.0524  24.0523
## agez          0.6619   0.0273  24.2301
## lths         -0.5311   0.1131  -4.6981
## coll         -0.2634   0.0608  -4.3337
## black1        0.2306   0.0947   2.4351
## hispanic1     0.4573   0.1051   4.3529
## other1        0.1670   0.1061   1.5741
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept)  0.5247 
##  Residual             11.6015 
## ---
## number of obs: 202449, groups: cofips, 209
## AIC = 1.56724e+06, DIC = 1567160
## deviance = 1567188.9

#confidence intervals for the Odds ratio
exp(confint(fit.mix, method="Wald"))

##                    2.5 %       97.5 %
## .sig01                NA           NA
## .sigma                NA           NA
## (Intercept) 1.271717e+10 1.675664e+10
## male        3.182487e+00 3.908180e+00
## agez        1.837433e+00 2.045112e+00
## lths        4.710886e-01 7.337729e-01
## coll        6.821073e-01 8.656378e-01
## black1      1.046019e+00 1.516194e+00
## hispanic1   1.285801e+00 1.940963e+00
## other1      9.598728e-01 1.455066e+00

Model 2

Here is a model that only considers the macro level variable on the outcome:

fit.mix.macro<-lmer(as.numeric(poorphyshlth)~ppov+pblack+phsormore+(1|cofips),data=merged)


r.squaredGLMM(fit.mix.macro)

##               R2m         R2c
## [1,] 0.0002255069 0.002181967

display(fit.mix.macro, detail=T, digits=4)

## lmer(formula = as.numeric(poorphyshlth) ~ ppov + pblack + phsormore + 
##     (1 | cofips), data = merged)
##             coef.est coef.se  t value 
## (Intercept)  23.7228   0.0644 368.3890
## ppov         -0.5672   0.1463  -3.8765
## pblack        0.1157   0.0574   2.0158
## phsormore    -0.2694   0.1045  -2.5784
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept)  0.5151 
##  Residual             11.6334 
## ---
## number of obs: 202449, groups: cofips, 209
## AIC = 1.56833e+06, DIC = 1568285
## deviance = 1568299.3

#confidence intervals for the Odds ratio
exp(confint(fit.mix.macro, method="Wald"))

##                    2.5 %       97.5 %
## .sig01                NA           NA
## .sigma                NA           NA
## (Intercept) 1.769490e+10 2.277592e+10
## ppov        4.257408e-01 7.554791e-01
## pblack      1.003207e+00 1.256243e+00
## phsormore   6.223573e-01 9.374157e-01

Compare fits using a likelihood ratio test

anova(fit.mix, fit.mix.macro)

## refitting model(s) with ML (instead of REML)

## Data: merged
## Models:
## fit.mix.macro: as.numeric(poorphyshlth) ~ ppov + pblack + phsormore + (1 | cofips)
## fit.mix: as.numeric(poorphyshlth) ~ male + agez + lths + coll + black + 
## fit.mix:     hispanic + other + (1 | cofips)
##               Df     AIC     BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## fit.mix.macro  6 1568311 1568373 -784150  1568299                         
## fit.mix       10 1567209 1567311 -783594  1567189 1110.4      4  < 2.2e-16
##                  
## fit.mix.macro    
## fit.mix       ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hypothesis: Racial/ethnic minorities living in areas with high poverty rates are more likely to experience poor physical health days than whites.

Estimate a cross-level interaction model

fit.mix.cross<-lmer(as.numeric(poorphyshlth)~male+agez+lths+coll+(black+hispanic+other)*ppov+ (1|cofips), data=merged)
display(fit.mix.cross, detail=T)

## lmer(formula = as.numeric(poorphyshlth) ~ male + agez + lths + 
##     coll + (black + hispanic + other) * ppov + (1 | cofips), 
##     data = merged)
##                coef.est coef.se t value
## (Intercept)     23.36     0.08  283.50 
## male             1.26     0.05   23.99 
## agez             0.66     0.03   24.25 
## lths            -0.52     0.11   -4.58 
## coll            -0.27     0.06   -4.37 
## black1           0.23     0.10    2.27 
## hispanic1        0.42     0.11    3.69 
## other1          -0.06     0.14   -0.41 
## ppov            -0.13     0.10   -1.32 
## black1:ppov     -0.03     0.17   -0.18 
## hispanic1:ppov  -0.23     0.17   -1.37 
## other1:ppov     -0.56     0.20   -2.77 
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  cofips   (Intercept)  0.52   
##  Residual             11.60   
## ---
## number of obs: 202449, groups: cofips, 209
## AIC = 1.56724e+06, DIC = 1567137
## deviance = 1567173.6

exp(confint(fit.mix.cross, method="Wald"))

##                       2.5 %       97.5 %
## .sig01                   NA           NA
## .sigma                   NA           NA
## (Intercept)    1.182692e+10 1.633494e+10
## male           3.173221e+00 3.896925e+00
## agez           1.838839e+00 2.046734e+00
## lths           4.771229e-01 7.433939e-01
## coll           6.804915e-01 8.635975e-01
## black1         1.031983e+00 1.529235e+00
## hispanic1      1.217222e+00 1.899217e+00
## other1         7.260826e-01 1.233537e+00
## ppov           7.251219e-01 1.065154e+00
## black1:ppov    6.878479e-01 1.363276e+00
## hispanic1:ppov 5.643470e-01 1.107680e+00
## other1:ppov    3.832547e-01 8.481125e-01

Homework 3

Monica Cruz

October 7, 2018

Clean and standardize contextual data

Research Question: Do individuals who are racial/ethnic minorities more likely than Whites to experience a greater number of poor physical health days?

Model 1 - individual level effects

Model 2

Compare fits using a likelihood ratio test

Hypothesis: Racial/ethnic minorities living in areas with high poverty rates are more likely to experience poor physical health days than whites.

Estimate a cross-level interaction model

Race/ethnic minorites face a greater risk of experiencing poor physical health in counties that have higher poverty rates.