#Mental health
## I use 2017 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART metro area survey data for this homework. My research question is how various socio-demographic variables may affect the average number of days in a month a person can tell his mental health was not good (which includes stress, depression, and problems with emotions). Therefore, the dependent variable is mental health (count variable measured as a number of days out of 30 days when mental health being not good). Predictors include income and employment status as socio-economic variables, age and health insurance coverage and bmi as health related variables.
brfss_17$men_health<-Recode(brfss_17$menthlth,recodes="77=NA;88=NA;99=NA")
head(brfss_17$men_health)
## [1] NA NA 5 3 NA NA
#income
brfss_17$income<-Recode(brfss_17$income2,recodes="77=NA;99=NA")
head(brfss_17$income)
## [1] 5 8 1 4 6 6
str(brfss_17$income)
## num [1:230875] 5 8 1 4 6 6 NA NA 6 NA ...
## - attr(*, "label")= chr "INCOME LEVEL"
#age; reference group - 18-24 old
brfss_17$age<-Recode(brfss_17$age80,recodes="18:24='Age 18 to 24';
25:29='Age 25 to 29';
30:34='Age 30 to 34';
35:39='Age 35 to 39';
40:44='Age 40 to 44';
45:49='Age 45 to 49';
50:54='Age 50 to 54';
55:59='Age 55 to 59';
60:64='Age 60 to 64';
65:69='Age 65 to 69';
70:74='Age 70 to 74';
75:79='Age 75 to 79';
80:99='Age 80 or older'",as.factor=TRUE)
#bmi
brfss_17$bmi<-brfss_17$bmi5/100
#health_care; reference group - "1" has healthcare coverage
brfss_17$health_care<-Recode(brfss_17$hlthpln1,recodes="7=NA;9=NA",as.factor=TRUE)
levels(brfss_17$health_care)
## [1] "1" "2"
#employment Status; 1 - employed (reference), 2 - unemployed, 3 - other (homemaker, student, retired, unable to work)
table(brfss_17$employ1)
##
## 1 2 3 4 5 6 7 8 9
## 99410 19204 5422 4933 12129 6975 66157 14504 2138
brfss_17$employ<-Recode(brfss_17$employ1,recodes="1:2='Employed';3:4='Unemployed';5:8='Other';9=NA", as.factor=T)
str(brfss_17$employ)
## Factor w/ 3 levels "Employed","Other",..: 1 1 1 1 2 1 1 2 2 2 ...
levels(brfss_17$employ)
## [1] "Employed" "Other" "Unemployed"
##Analysis
sub<-brfss_17%>%
select(men_health, income, bmi,
age,health_care,employ, mmsawt, ststr) %>%
filter( complete.cases(.))
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr,
weights=~mmsawt,
data =sub[is.na(sub$mmsawt)==F,])
#Using Poisson glm fit the survey design
fit1<-svyglm(men_health~income+age+bmi+health_care+employ, design=des, family=poisson)
summary(fit1)
##
## Call:
## svyglm(formula = men_health ~ income + age + bmi + health_care +
## employ, design = des, family = poisson)
##
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = sub[is.na(sub$mmsawt) ==
## F, ])
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.300967 0.044745 51.424 < 2e-16 ***
## income -0.064747 0.003715 -17.428 < 2e-16 ***
## ageAge 25 to 29 0.057943 0.033037 1.754 0.079458 .
## ageAge 30 to 34 0.053423 0.034101 1.567 0.117214
## ageAge 35 to 39 0.075320 0.033328 2.260 0.023828 *
## ageAge 40 to 44 0.063029 0.035320 1.784 0.074351 .
## ageAge 45 to 49 0.066648 0.035262 1.890 0.058755 .
## ageAge 50 to 54 0.139957 0.035569 3.935 8.34e-05 ***
## ageAge 55 to 59 0.142778 0.032574 4.383 1.17e-05 ***
## ageAge 60 to 64 0.117346 0.033649 3.487 0.000488 ***
## ageAge 65 to 69 0.021603 0.040399 0.535 0.592834
## ageAge 70 to 74 -0.044115 0.048290 -0.914 0.360955
## ageAge 75 to 79 -0.085165 0.055015 -1.548 0.121622
## ageAge 80 or older -0.026883 0.049614 -0.542 0.587930
## bmi 0.008518 0.001010 8.436 < 2e-16 ***
## health_care2 0.075150 0.025106 2.993 0.002761 **
## employOther 0.217753 0.020177 10.792 < 2e-16 ***
## employUnemployed 0.226700 0.029143 7.779 7.44e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 9.048215)
##
## Number of Fisher Scoring iterations: 5
#The results show that income has a negative realtionship with average number of days reporting bad mental health. As for age variable the older respondents get the larger is the average number of days with bad mental health (this trend is true until the age of 64), respondents of older age groups have on average fewer bad mental health days than the reference group (but the results are not significant for these groups). Those who do not have an insurance coverage on avearge report more days of bad mental health; unemployed or other categories report on avearge more days of having bad mental health than employed.
fit2<-glm(men_health~income+age+bmi+health_care+employ, data=sub, family=poisson)
summary(fit2)
##
## Call:
## glm(formula = men_health ~ income + age + bmi + health_care +
## employ, family = poisson, data = sub)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.278 -2.717 -1.367 1.697 6.860
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.4492995 0.0075956 322.463 < 2e-16 ***
## income -0.0831352 0.0005953 -139.661 < 2e-16 ***
## ageAge 25 to 29 0.0467084 0.0064293 7.265 3.73e-13 ***
## ageAge 30 to 34 0.0700591 0.0064544 10.854 < 2e-16 ***
## ageAge 35 to 39 0.1006858 0.0063601 15.831 < 2e-16 ***
## ageAge 40 to 44 0.0831107 0.0065464 12.696 < 2e-16 ***
## ageAge 45 to 49 0.0958100 0.0062466 15.338 < 2e-16 ***
## ageAge 50 to 54 0.1277875 0.0059426 21.504 < 2e-16 ***
## ageAge 55 to 59 0.1391562 0.0057667 24.131 < 2e-16 ***
## ageAge 60 to 64 0.0857108 0.0058879 14.557 < 2e-16 ***
## ageAge 65 to 69 0.0119522 0.0062354 1.917 0.0553 .
## ageAge 70 to 74 -0.0481779 0.0069511 -6.931 4.18e-12 ***
## ageAge 75 to 79 -0.1086489 0.0082983 -13.093 < 2e-16 ***
## ageAge 80 or older -0.0905993 0.0080089 -11.312 < 2e-16 ***
## bmi 0.0066148 0.0001750 37.808 < 2e-16 ***
## health_care2 0.0867025 0.0042977 20.174 < 2e-16 ***
## employOther 0.2257106 0.0032301 69.877 < 2e-16 ***
## employUnemployed 0.2086925 0.0050497 41.327 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 553509 on 60043 degrees of freedom
## Residual deviance: 506161 on 60026 degrees of freedom
## AIC: 728751
##
## Number of Fisher Scoring iterations: 5
scale<-sqrt(fit2$deviance/fit2$df.residual)
scale
## [1] 2.903854
1-pchisq(fit2$deviance, df = fit2$df.residual)
## [1] 0
##Regular glm does not fit the data well, since p value is 0 when we use deviance to test the model fit.
fit3<-glm(men_health~income+age+bmi+health_care+employ, data=sub, family=quasipoisson)
summary(fit3)
##
## Call:
## glm(formula = men_health ~ income + age + bmi + health_care +
## employ, family = quasipoisson, data = sub)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -6.278 -2.717 -1.367 1.697 6.860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.4492995 0.0229795 106.586 < 2e-16 ***
## income -0.0831352 0.0018009 -46.163 < 2e-16 ***
## ageAge 25 to 29 0.0467084 0.0194509 2.401 0.016338 *
## ageAge 30 to 34 0.0700591 0.0195269 3.588 0.000334 ***
## ageAge 35 to 39 0.1006858 0.0192415 5.233 1.68e-07 ***
## ageAge 40 to 44 0.0831107 0.0198051 4.196 2.72e-05 ***
## ageAge 45 to 49 0.0958100 0.0188982 5.070 3.99e-07 ***
## ageAge 50 to 54 0.1277875 0.0179785 7.108 1.19e-12 ***
## ageAge 55 to 59 0.1391562 0.0174462 7.976 1.53e-15 ***
## ageAge 60 to 64 0.0857108 0.0178130 4.812 1.50e-06 ***
## ageAge 65 to 69 0.0119522 0.0188644 0.634 0.526354
## ageAge 70 to 74 -0.0481779 0.0210295 -2.291 0.021969 *
## ageAge 75 to 79 -0.1086489 0.0251054 -4.328 1.51e-05 ***
## ageAge 80 or older -0.0905993 0.0242299 -3.739 0.000185 ***
## bmi 0.0066148 0.0005293 12.497 < 2e-16 ***
## health_care2 0.0867025 0.0130020 6.668 2.61e-11 ***
## employOther 0.2257106 0.0097723 23.097 < 2e-16 ***
## employUnemployed 0.2086925 0.0152773 13.660 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 9.152833)
##
## Null deviance: 553509 on 60043 degrees of freedom
## Residual deviance: 506161 on 60026 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 5
##We use quasi poisson distribution to scale the variance and get the results very similar to survey design model (however, coefficients are more significant than in survey).
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
coeftest(fit2, vcov=vcovHC(fit2, type="HC1",cluster="ststr"))
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.44929954 0.02268729 107.9591 < 2.2e-16 ***
## income -0.08313517 0.00178003 -46.7043 < 2.2e-16 ***
## ageAge 25 to 29 0.04670836 0.01889952 2.4714 0.0134584 *
## ageAge 30 to 34 0.07005909 0.01915658 3.6572 0.0002550 ***
## ageAge 35 to 39 0.10068576 0.01888837 5.3306 9.791e-08 ***
## ageAge 40 to 44 0.08311071 0.01928458 4.3097 1.635e-05 ***
## ageAge 45 to 49 0.09580996 0.01840718 5.2050 1.940e-07 ***
## ageAge 50 to 54 0.12778755 0.01748185 7.3097 2.677e-13 ***
## ageAge 55 to 59 0.13915616 0.01692073 8.2240 < 2.2e-16 ***
## ageAge 60 to 64 0.08571079 0.01753926 4.8868 1.025e-06 ***
## ageAge 65 to 69 0.01195216 0.01879096 0.6361 0.5247378
## ageAge 70 to 74 -0.04817792 0.02128211 -2.2638 0.0235879 *
## ageAge 75 to 79 -0.10864890 0.02560260 -4.2437 2.199e-05 ***
## ageAge 80 or older -0.09059926 0.02477666 -3.6566 0.0002555 ***
## bmi 0.00661480 0.00051901 12.7451 < 2.2e-16 ***
## health_care2 0.08670253 0.01278110 6.7837 1.172e-11 ***
## employOther 0.22571058 0.00987013 22.8681 < 2.2e-16 ***
## employUnemployed 0.20869251 0.01494534 13.9637 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Negative binomial results are very close to complex survey design.