#load brfss
library(car)
library(stargazer)
library(survey)
library(ggplot2)
load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
#1) The count outcome is depression. The question asked is "(Ever told) you have a depressive disorder (including depression, major depression, dysthymia, or minor depression)?
Do veterans have depression more than non-veterans? This does not need an offset because every person in the survey was askd the sam equestions.
#outcome variable is Depression
brfss_17$Depression<-Recode(brfss_17$addepev2, recodes = "7=NA; 9=NA")
hist(brfss_17$Depression)
summary(brfss_17$Depression)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1.000 2.000 2.000 1.804 2.000 2.000 1140
#Veteran status
brfss_17$vetstatus<-Recode(brfss_17$veteran3, recodes="1= 'Veteran'; 2= 'Nonveteran'; else=NA", as.factor = T)
brfss_17$vetstatus<-relevel(brfss_17$vetstatus, ref = "Veteran")
#brfss_17$badhealth<-ifelse(brfss_17$genhlth %in% c(4,5),1,0)
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")
brfss_17$race_eth<-Recode(brfss_17$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
brfss_17$race_eth<-relevel(brfss_17$race_eth, ref = "nhwhite")
#insurance
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
#income grouping
brfss_17$inc<-ifelse(brfss_17$incomg==9, NA, brfss_17$incomg)
#education level
brfss_17$educ<-Recode(brfss_17$educa,
recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA",
as.factor=T)
brfss_17$educ<-relevel(brfss_17$educ, ref='2hsgrad')
#employment
brfss_17$employ<-Recode(brfss_17$employ1,
recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA",
as.factor=T)
brfss_17$employ<-relevel(brfss_17$employ, ref='Employed')
#marital status
brfss_17$marst<-Recode(brfss_17$marital,
recodes="1='married'; 2='divorced'; 3='widowed'; 4='separated'; 5='nm';6='cohab'; else=NA",
as.factor=T)
brfss_17$marst<-relevel(brfss_17$marst, ref='married')
#Age cut into intervals
brfss_17$agec<-cut(brfss_17$age80, breaks=c(0,24,39,59,79,99))
#BMI, in the brfss_17a the bmi variable has 2 implied decimal places,
#so we must divide by 100 to get real bmi's
brfss_17$bmi<-brfss_17$bmi5/100
#smoking currently
brfss_17$smoke<-Recode(brfss_17$smoker3,
recodes="1:2='Current'; 3='Former';4='NeverSmoked'; else=NA",
as.factor=T)
brfss_17$smoke<-relevel(brfss_17$smoke, ref = "NeverSmoked")
#Here I keep complete cases on my key variables,
#just for speed (the survey procedures can run for a long time)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sub<-brfss_17%>%
select(Depression, vetstatus,
agec,race_eth, marst, educ,white, black, hispanic,
other, ins, mmsawt, ststr) %>%
filter( complete.cases(.))
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data =sub )
#OR THE BRFSS, R GAVE ME A WARNING AND I NEEDED TO ADD:
#YOU MAY NOT NEED TO DO THIS!!!!
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr,
weights=~mmsawt,
data = brfss_17[is.na(brfss_17$mmsawt)==F,])
svyhist(~Depression, des)
svyby(~Depression, ~race_eth+educ, des, svymean, na.rm=T)
## race_eth educ Depression se
## nhwhite.2hsgrad nhwhite 2hsgrad 1.784797 0.004328881
## hispanic.2hsgrad hispanic 2hsgrad 1.867142 0.008439934
## nh black.2hsgrad nh black 2hsgrad 1.844332 0.008613719
## nh multirace.2hsgrad nh multirace 2hsgrad 1.687759 0.029969545
## nh other.2hsgrad nh other 2hsgrad 1.871327 0.015959179
## nhwhite.0Prim nhwhite 0Prim 1.737838 0.023072885
## hispanic.0Prim hispanic 0Prim 1.867133 0.010537138
## nh black.0Prim nh black 0Prim 1.786697 0.032510736
## nh multirace.0Prim nh multirace 0Prim 1.641944 0.091136662
## nh other.0Prim nh other 0Prim 1.879223 0.040643220
## nhwhite.1somehs nhwhite 1somehs 1.681027 0.012785141
## hispanic.1somehs hispanic 1somehs 1.858145 0.011125185
## nh black.1somehs nh black 1somehs 1.744416 0.020865539
## nh multirace.1somehs nh multirace 1somehs 1.637382 0.061932560
## nh other.1somehs nh other 1somehs 1.675660 0.065141753
## nhwhite.3somecol nhwhite 3somecol 1.769938 0.003978383
## hispanic.3somecol hispanic 3somecol 1.821423 0.009926594
## nh black.3somecol nh black 3somecol 1.844672 0.009317801
## nh multirace.3somecol nh multirace 3somecol 1.747794 0.020142887
## nh other.3somecol nh other 3somecol 1.840616 0.019249206
## nhwhite.4colgrad nhwhite 4colgrad 1.830927 0.002416563
## hispanic.4colgrad hispanic 4colgrad 1.877249 0.007322868
## nh black.4colgrad nh black 4colgrad 1.896054 0.005971260
## nh multirace.4colgrad nh multirace 4colgrad 1.770049 0.022425456
## nh other.4colgrad nh other 4colgrad 1.907185 0.009541618
svyby(~Depression, ~agec, des, svymean, na.rm=T)
## agec Depression se
## (0,24] (0,24] 1.816360 0.005550975
## (24,39] (24,39] 1.819274 0.003274101
## (39,59] (39,59] 1.808873 0.002946819
## (59,79] (59,79] 1.813606 0.003334247
## (79,99] (79,99] 1.890416 0.006993593
svyby(~Depression, ~vetstatus, des, svymean, na.rm=T)
## vetstatus Depression se
## Veteran Veteran 1.821962 0.005489156
## Nonveteran Nonveteran 1.816495 0.001786697
#Poisson glm fit to survey data
fit1<-svyglm(Depression~factor(race_eth)+factor(educ)+factor(agec), design=des, family=poisson)
summary(fit1)
##
## Call:
## svyglm(formula = Depression ~ factor(race_eth) + factor(educ) +
## factor(agec), design = des, family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = brfss_17[is.na(brfss_17$mmsawt) ==
## F, ])
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.577168 0.003664 157.538 < 2e-16 ***
## factor(race_eth)hispanic 0.049362 0.002970 16.618 < 2e-16 ***
## factor(race_eth)nh black 0.037690 0.002937 12.831 < 2e-16 ***
## factor(race_eth)nh multirace -0.028501 0.007811 -3.649 0.000263 ***
## factor(race_eth)nh other 0.044133 0.004481 9.849 < 2e-16 ***
## factor(educ)0Prim -0.008152 0.005501 -1.482 0.138370
## factor(educ)1somehs -0.038697 0.005071 -7.631 2.34e-14 ***
## factor(educ)3somecol -0.008130 0.002702 -3.009 0.002620 **
## factor(educ)4colgrad 0.026397 0.002280 11.579 < 2e-16 ***
## factor(agec)(24,39] -0.004870 0.003605 -1.351 0.176669
## factor(agec)(39,59] -0.006623 0.003526 -1.878 0.060380 .
## factor(agec)(59,79] 0.003123 0.003714 0.841 0.400438
## factor(agec)(79,99] 0.050374 0.005028 10.018 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 0.08173664)
##
## Number of Fisher Scoring iterations: 4
#here are the poisson model "risk ratios", which just show the change in the mean
round(exp(summary(fit1)$coef[-1,1]), 3)
## factor(race_eth)hispanic factor(race_eth)nh black
## 1.051 1.038
## factor(race_eth)nh multirace factor(race_eth)nh other
## 0.972 1.045
## factor(educ)0Prim factor(educ)1somehs
## 0.992 0.962
## factor(educ)3somecol factor(educ)4colgrad
## 0.992 1.027
## factor(agec)(24,39] factor(agec)(39,59]
## 0.995 0.993
## factor(agec)(59,79] factor(agec)(79,99]
## 1.003 1.052
#2) Consider a Poisson regression model for the outcomea. Evaluate the level of dispersion in the outcomeb. Is the Poisson model a good choice?
fit2<-glm(Depression~factor(race_eth)+factor(educ)+factor(agec), data=brfss_17, family=poisson)
summary(fit2)
##
## Call:
## glm(formula = Depression ~ factor(race_eth) + factor(educ) +
## factor(agec), family = poisson, data = brfss_17)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.78671 0.08718 0.13098 0.15835 0.28078
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.572589 0.006942 82.486 < 2e-16 ***
## factor(race_eth)hispanic 0.035217 0.005721 6.156 7.46e-10 ***
## factor(race_eth)nh black 0.037540 0.005213 7.201 5.96e-13 ***
## factor(race_eth)nh multirace -0.030867 0.012105 -2.550 0.0108 *
## factor(race_eth)nh other 0.038391 0.007894 4.863 1.15e-06 ***
## factor(educ)0Prim -0.012931 0.011436 -1.131 0.2582
## factor(educ)1somehs -0.041253 0.008582 -4.807 1.54e-06 ***
## factor(educ)3somecol -0.006808 0.004419 -1.541 0.1234
## factor(educ)4colgrad 0.026491 0.004059 6.527 6.73e-11 ***
## factor(agec)(24,39] -0.010687 0.007386 -1.447 0.1479
## factor(agec)(39,59] -0.012658 0.006960 -1.819 0.0690 .
## factor(agec)(59,79] 0.006461 0.006902 0.936 0.3492
## factor(agec)(79,99] 0.058271 0.008467 6.882 5.90e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 22582 on 224369 degrees of freedom
## Residual deviance: 22273 on 224357 degrees of freedom
## (6505 observations deleted due to missingness)
## AIC: 581669
##
## Number of Fisher Scoring iterations: 4
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 0.315076
fit3<-glm(Depression~factor(race_eth)+factor(educ)+factor(agec), data=brfss_17, family=quasipoisson)
summary(fit3)
##
## Call:
## glm(formula = Depression ~ factor(race_eth) + factor(educ) +
## factor(agec), family = quasipoisson, data = brfss_17)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.78671 0.08718 0.13098 0.15835 0.28078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.572589 0.002044 280.105 < 2e-16 ***
## factor(race_eth)hispanic 0.035217 0.001685 20.904 < 2e-16 ***
## factor(race_eth)nh black 0.037540 0.001535 24.455 < 2e-16 ***
## factor(race_eth)nh multirace -0.030867 0.003565 -8.659 < 2e-16 ***
## factor(race_eth)nh other 0.038391 0.002325 16.515 < 2e-16 ***
## factor(educ)0Prim -0.012931 0.003368 -3.839 0.000123 ***
## factor(educ)1somehs -0.041253 0.002527 -16.322 < 2e-16 ***
## factor(educ)3somecol -0.006808 0.001301 -5.231 1.69e-07 ***
## factor(educ)4colgrad 0.026491 0.001195 22.163 < 2e-16 ***
## factor(agec)(24,39] -0.010687 0.002175 -4.914 8.95e-07 ***
## factor(agec)(39,59] -0.012658 0.002050 -6.176 6.60e-10 ***
## factor(agec)(59,79] 0.006461 0.002032 3.179 0.001480 **
## factor(agec)(79,99] 0.058271 0.002493 23.370 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.08671977)
##
## Null deviance: 22582 on 224369 degrees of freedom
## Residual deviance: 22273 on 224357 degrees of freedom
## (6505 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 4
#3) Consider a Negative binomial model
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(lmtest)
## Warning: package 'lmtest' was built under R version 3.6.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.6.3
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
## Warning: package 'sandwich' was built under R version 3.6.3
fit.nb1<-glm.nb(Depression~factor(race_eth),
data=sub,
weights=mmsawt/mean(mmsawt, na.rm=T))
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
fit.nb2<-glm.nb(Depression~factor(race_eth)+factor(educ)+factor(agec)+factor(vetstatus),
data=sub,
weights=mmsawt/mean(mmsawt, na.rm=T))
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
## Warning in glm.nb(Depression ~ factor(race_eth) + factor(educ) + factor(agec)
## + : alternation limit reached
#clx2(fit.nb2,cluster =sub$ststr)
tests1<-coeftest(fit.nb1, vcov=vcovHC(fit.nb2, type="HC1",cluster="ststr"))
tests<-coeftest(fit.nb2, vcov=vcovHC(fit.nb2, type="HC1",cluster="ststr"))
library(stargazer)
stargazer(fit.nb1, fit.nb2,style="demography", type = "text", t.auto=F,p.auto=F,coef=list(tests1[, 1],tests[,1]), se =list(tests1[, 2], tests[, 2]), p=list(tests1[,4],tests[, 4]) )
##
## --------------------------------------------------------------------------------
## Depression
## Model 1 Model 2
## --------------------------------------------------------------------------------
## factor(race_eth)hispanic 0.036*** 0.049***
## (0.003) (0.003)
## factor(race_eth)nh black 0.030*** 0.038***
## (0.003) (0.003)
## factor(race_eth)nh multirace -0.035*** -0.029***
## (0.008) (0.008)
## factor(race_eth)nh other 0.047*** 0.045***
## (0.004) (0.004)
## factor(educ)0Prim -0.008
## (0.006)
## factor(educ)1somehs -0.037***
## (0.005)
## factor(educ)3somecol -0.008**
## (0.003)
## factor(educ)4colgrad 0.027***
## (0.002)
## factor(agec)(24,39] -0.004
## (0.004)
## factor(agec)(39,59] -0.006
## (0.004)
## factor(agec)(59,79] 0.004
## (0.004)
## factor(agec)(79,99] 0.051***
## (0.005)
## factor(vetstatus)Nonveteran -0.004
## (0.003)
## Constant 0.582*** 0.579***
## (0.005) (0.005)
## N 222,225 222,225
## Log Likelihood -288,399.600 -288,313.200
## theta 381,536.400 (210,793.400) 382,303.500 (211,324.300)
## AIC 576,809.200 576,654.300
## --------------------------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001
#4) Comparing the models, the AIC of the negative binomial model is lower which suggest that it is the better fit.
More veterans have depression than nonveterans.