## QUESTION 1) For this assignment we will look at self reported mental health (reported by "bad" days in a month) and how it is influenced by marriage, sexual orientation, self reported stress (Likert scale) and self reported depression (yes or no).
#Load packages required
# a) How does marriage and sexual orientation influence an individuals mental health?
# b) An offset term is not necessary. You use an offset term when the units of observation are different in some dimension, such as count comparisons over different times.
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(foreign)
brfss2017=read.xport("~/Desktop/DataFolder/brfss.XPT ")
#Rename variables
brfss2017$marital<-Recode(brfss2017$MARITAL,recodes="1=1;5=0;else=NA",as.factor=T)
brfss2017$sxorient<-Recode(brfss2017$SXORIENT,recodes="1='a) Straight';2='b) Gay';else=NA")
brfss2017$genh<-Recode(brfss2017$GENHLTH,recodes="1:3=0;4:5=1;else=NA")
brfss2017$int<-interaction(brfss2017$marital,brfss2017$sxorient)
brfss2017$age<-Recode(brfss2017$X_AGE_G, recodes="1='18-24';2='25-34';3='35-44';4='45-54';5='55-64';6='65+';else=NA",as.factor=T)
brfss2017$educ<-Recode(brfss2017$EDUCA, recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
brfss2017$educ<-relevel(brfss2017$educ, ref='0Prim')
brfss2017$hyper<-Recode(brfss2017$BPMEDS,recodes="1='Y';2='N';else=NA",as.factor=T)
brfss2017$choles<-Recode(brfss2017$TOLDHI2,recodes="1='Y';2='N';else=NA",as.factor=T)
brfss2017$blpressure<-Recode(brfss2017$BPMEDS,recodes="1='Y';2='N';else=NA",as.factor=T)
brfss2017$heart<-Recode(brfss2017$CVDCRHD4,recodes="1='Y';2='N';else=NA",as.factor=T)
brfss2017$stroke<-Recode(brfss2017$CVDSTRK3,recodes="1='Y';2='N';else=NA",as.factor=T)
brfss2017$chronic<-Recode(brfss2017$CHCCOPD1,recodes="1='Y';2='N';else=NA",as.factor=T)
brfss2017$depressed<-Recode(brfss2017$ADDEPEV2,recodes="1='Y';2='N';else=NA",as.factor=T)
brfss2017$hlthinsur<-Recode(brfss2017$HLTHPLN1,recodes="1='Y';2='N';else=NA",as.factor=T)
brfss2017$checkup<-Recode(brfss2017$CHECKUP1,recodes="1='1 year';2='2 years';3='5 years';4='5+ years';else=NA",as.factor=T)
brfss2017$stress<-Recode(brfss2017$SDHSTRES,recodes="1='a-None';2='b-Little';3='c-Some';4='d-Most';5='e-Always';else=NA",as.factor=T)
brfss2017$emosupport<-Recode(brfss2017$EMTSUPRT,recodes="1='a-Always';2='b-Usually';3='c-Sometimes';4='d-Rarely';5='e-Never';else=NA",as.factor=T)
brfss2017$satisfed<-Recode(brfss2017$LSATISFY,recodes="1:2='a-Satisfied';3:4='b-Dissatisfied';else=NA",as.factor=T)
brfss2017$safeneigh<-Recode(brfss2017$HOWSAFE1,recodes="1:2='a-Safe';3:4='b-Unsafe';else=NA",as.factor=T)
brfss2017$menthlth<-Recode(brfss2017$MENTHLTH,recodes="88=0;77=NA;99=NA")
brfss2017$educ<-Recode(brfss2017$EDUCA, recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
brfss2017$educ<-relevel(brfss2017$educ, ref='0Prim')
brfss2017$age<-Recode(brfss2017$X_AGE_G, recodes="1='18-24';2='25-34';3='35-44';4='45-54';5='55-64';6='65+';else=NA",as.factor=T)
brfss2017$poorh<-Recode(brfss2017$POORHLTH,recodes="88=0;77=NA;99=NA")
brfss2017$physh<-Recode(brfss2017$PHYSHLTH,recodes="88=0;77=NA;99=NA")
brfss2017$menth<-Recode(brfss2017$MENTHLTH,recodes="88=0;77=NA;99=NA")
#Our outcome is self reported mental health.
hist(brfss2017$menthlth)

summary(brfss2017$menthlth)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 0.000 0.000 3.595 2.000 30.000 7203
library(dplyr)
#Choose variables of interest and tell R our survey design
library(dplyr)
sub<-brfss2017%>%
select(menthlth,marital,sxorient,X_LLCPWT,X_STRWT,hyper,choles,blpressure,depressed,stress,emosupport)%>%
filter(complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(ids = ~1,strata=~X_STRWT,weights = ~X_LLCPWT,data = sub)
#descriptive statistics
svyhist(~menthlth,des)

svyby(~menthlth,~sxorient+marital,des,svymean,na.rm=T)
## sxorient marital menthlth se
## a) Straight.0 a) Straight 0 6.773951 0.9743884
## b) Gay.0 b) Gay 0 3.727829 1.6683535
## a) Straight.1 a) Straight 1 2.455619 0.1836451
## b) Gay.1 b) Gay 1 1.476447 0.6146046
# QUESTION 2) Poisson glm fit to survey data
fit1<-svyglm(menthlth~factor(sxorient)+factor(marital)+factor(stress)+factor(depressed),design=des,family=poisson)
summary(fit1)
##
## Call:
## svyglm(formula = menthlth ~ factor(sxorient) + factor(marital) +
## factor(stress) + factor(depressed), design = des, family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~X_STRWT, weights = ~X_LLCPWT, data = sub)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.5186 0.3693 1.404 0.160409
## factor(sxorient)b) Gay -0.6468 0.2568 -2.518 0.011840 *
## factor(marital)1 -0.6196 0.1826 -3.393 0.000699 ***
## factor(stress)b-Little 0.4826 0.2791 1.729 0.083806 .
## factor(stress)c-Some 1.0922 0.2746 3.978 7.08e-05 ***
## factor(stress)d-Most 1.8090 0.2799 6.464 1.15e-10 ***
## factor(stress)e-Always 1.9871 0.3252 6.111 1.09e-09 ***
## factor(depressed)Y 0.9928 0.1345 7.383 1.89e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 13.63436)
##
## Number of Fisher Scoring iterations: 7
#"Risk ratios" which show the change in the mean
round(exp(summary(fit1)$coef[-1,1]),3)
## factor(sxorient)b) Gay factor(marital)1 factor(stress)b-Little
## 0.524 0.538 1.620
## factor(stress)c-Some factor(stress)d-Most factor(stress)e-Always
## 2.981 6.104 7.295
## factor(depressed)Y
## 2.699
#a)
fit2<-glm(menthlth~factor(sxorient)+factor(marital)+factor(stress)+factor(depressed),data = brfss2017,family=poisson)
summary(fit2)
##
## Call:
## glm(formula = menthlth ~ factor(sxorient) + factor(marital) +
## factor(stress) + factor(depressed), family = poisson, data = brfss2017)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.427 -1.767 -1.135 -1.047 13.107
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.236443 0.009712 -24.345 < 2e-16 ***
## factor(sxorient)b) Gay 0.096053 0.016635 5.774 7.74e-09 ***
## factor(marital)1 -0.204349 0.006076 -33.631 < 2e-16 ***
## factor(stress)b-Little 0.885911 0.010678 82.968 < 2e-16 ***
## factor(stress)c-Some 1.642011 0.010252 160.172 < 2e-16 ***
## factor(stress)d-Most 2.256092 0.010861 207.719 < 2e-16 ***
## factor(stress)e-Always 2.495321 0.011553 215.989 < 2e-16 ***
## factor(depressed)Y 0.962199 0.006343 151.684 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 411285 on 40880 degrees of freedom
## Residual deviance: 247715 on 40873 degrees of freedom
## (409135 observations deleted due to missingness)
## AIC: 292552
##
## Number of Fisher Scoring iterations: 7
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 2.461831
#The p value is 0 so the model is not a good fit.
1-pchisq(fit2$deviance,df=fit2$df.residual)
## [1] 0
fit3<-glm(menthlth~factor(sxorient)+factor(marital)+factor(stress)+factor(depressed),data = brfss2017,family=quasipoisson)
summary(fit3)
##
## Call:
## glm(formula = menthlth ~ factor(sxorient) + factor(marital) +
## factor(stress) + factor(depressed), family = quasipoisson,
## data = brfss2017)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.427 -1.767 -1.135 -1.047 13.107
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.23644 0.03496 -6.762 1.37e-11 ***
## factor(sxorient)b) Gay 0.09605 0.05989 1.604 0.109
## factor(marital)1 -0.20435 0.02187 -9.342 < 2e-16 ***
## factor(stress)b-Little 0.88591 0.03844 23.047 < 2e-16 ***
## factor(stress)c-Some 1.64201 0.03691 44.492 < 2e-16 ***
## factor(stress)d-Most 2.25609 0.03910 57.699 < 2e-16 ***
## factor(stress)e-Always 2.49532 0.04159 59.997 < 2e-16 ***
## factor(depressed)Y 0.96220 0.02284 42.134 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 12.96011)
##
## Null deviance: 411285 on 40880 degrees of freedom
## Residual deviance: 247715 on 40873 degrees of freedom
## (409135 observations deleted due to missingness)
## AIC: NA
##
## Number of Fisher Scoring iterations: 7
# b) the Poisson model is not a good fit.
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
coeftest(fit2,vcov=vcovHC(fit2,type="HC1",cluster="ststr"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.236443 0.037476 -6.3092 2.804e-10 ***
## factor(sxorient)b) Gay 0.096053 0.050049 1.9192 0.05496 .
## factor(marital)1 -0.204349 0.020130 -10.1514 < 2.2e-16 ***
## factor(stress)b-Little 0.885911 0.041202 21.5017 < 2.2e-16 ***
## factor(stress)c-Some 1.642011 0.040992 40.0565 < 2.2e-16 ***
## factor(stress)d-Most 2.256092 0.043907 51.3838 < 2.2e-16 ***
## factor(stress)e-Always 2.495321 0.045487 54.8580 < 2.2e-16 ***
## factor(depressed)Y 0.962199 0.024348 39.5189 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# QUESTION 3 Consider a negative binomial model
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
fit.nb2<-glm.nb(menthlth~factor(sxorient)+factor(marital)+factor(stress)+factor(depressed),
data = sub,
weights=X_LLCPWT/mean(X_LLCPWT,na.rm=T))
summary(fit.nb2)
##
## Call:
## glm.nb(formula = menthlth ~ factor(sxorient) + factor(marital) +
## factor(stress) + factor(depressed), data = sub, weights = X_LLCPWT/mean(X_LLCPWT,
## na.rm = T), init.theta = 0.1577185586, link = log)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.2131 -0.7515 -0.4993 -0.2074 7.3268
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.7161 0.1193 6.002 1.95e-09 ***
## factor(sxorient)b) Gay -1.2072 0.3441 -3.508 0.000451 ***
## factor(marital)1 -1.0159 0.1150 -8.836 < 2e-16 ***
## factor(stress)b-Little 0.5146 0.1021 5.041 4.62e-07 ***
## factor(stress)c-Some 1.2056 0.1276 9.451 < 2e-16 ***
## factor(stress)d-Most 2.0072 0.1942 10.334 < 2e-16 ***
## factor(stress)e-Always 2.0302 0.2789 7.280 3.35e-13 ***
## factor(depressed)Y 1.1083 0.1140 9.720 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.1577) family taken to be 1)
##
## Null deviance: 3204.8 on 3889 degrees of freedom
## Residual deviance: 2533.4 on 3882 degrees of freedom
## AIC: 12031
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.15772
## Std. Err.: 0.00614
##
## 2 x log-likelihood: -12013.02300
coeftest(fit.nb2,vcov=vcovHC(fit.nb2,type="HC1",cluster="X_STRWT"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.71609 0.48876 1.4651 0.142890
## factor(sxorient)b) Gay -1.20715 0.45477 -2.6544 0.007945 **
## factor(marital)1 -1.01587 0.40009 -2.5391 0.011114 *
## factor(stress)b-Little 0.51464 0.24823 2.0733 0.038148 *
## factor(stress)c-Some 1.20563 0.24679 4.8852 1.033e-06 ***
## factor(stress)d-Most 2.00721 0.26087 7.6944 1.422e-14 ***
## factor(stress)e-Always 2.03015 0.33717 6.0211 1.733e-09 ***
## factor(depressed)Y 1.10827 0.13997 7.9178 2.418e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# QUESTION 4 The negative binomial model gave the lowest AIC.