I use 2016 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART metro area survey data for this research question.
brfss_17$health<-Recode(brfss_17$genhlth, recodes="1:3=1;4:5=0;else=NA")
brfss_17$income<-Recode(brfss_17$income2,recodes="77=NA;99=NA")
brfss_17$income<-Recode(brfss_17$income2, recodes="1='less than $10,000';
2='$10,000-$15,000';
3='$15,000-$20,000';
4='$20,000-$25,000';
5='$25,000-$35,000';
6='$35,000-$50,000';
7='$50,000-$75,000';
8='$75,000 or more'", as.factor=TRUE)
brfss_17$age<-Recode(brfss_17$age80,recodes="18:24='Age 18 to 24';
25:29='Age 25 to 29';
30:34='Age 30 to 34';
35:39='Age 35 to 39';
40:44='Age 40 to 44';
45:49='Age 45 to 49';
50:54='Age 50 to 54';
55:59='Age 55 to 59';
60:64='Age 60 to 64';
65:69='Age 65 to 69';
70:74='Age 70 to 74';
75:79='Age 75 to 79';
80:99='Age 80 or older'",as.factor=TRUE)
brfss_17$bmi<-brfss_17$bmi5/100
table(brfss_17$health,brfss_17$income)
##
## $10,000-$15,000 $15,000-$20,000 $20,000-$25,000 $25,000-$35,000
## 0 3671 4333 4430 4102
## 1 4650 8321 11431 14377
##
## $35,000-$50,000 $50,000-$75,000 $75,000 or more 77 99
## 0 4337 3570 4777 4392 3147
## 1 21440 26445 67161 12071 17787
##
## less than $10,000
## 0 3658
## 1 4490
prop.table(table(brfss_17$health,brfss_17$income),margin=2)
##
## $10,000-$15,000 $15,000-$20,000 $20,000-$25,000 $25,000-$35,000
## 0 0.4411729 0.3424214 0.2793014 0.2219817
## 1 0.5588271 0.6575786 0.7206986 0.7780183
##
## $35,000-$50,000 $50,000-$75,000 $75,000 or more 77 99
## 0 0.1682508 0.1189405 0.0664044 0.2667801 0.1503296
## 1 0.8317492 0.8810595 0.9335956 0.7332199 0.8496704
##
## less than $10,000
## 0 0.4489445
## 1 0.5510555
chisq.test(table(brfss_17$health,brfss_17$income))
##
## Pearson's Chi-squared test
##
## data: table(brfss_17$health, brfss_17$income)
## X-squared = 19674, df = 9, p-value < 2.2e-16
Tables demonstrate that with increase in income the proportion of residents who report good health is growing and the proportion of those reporting bad health is decreasing. According to Pearson’s Chi-squared test results there is a relationsip between income and age. # age
table(brfss_17$health,brfss_17$age)
##
## Age 18 to 24 Age 25 to 29 Age 30 to 34 Age 35 to 39 Age 40 to 44
## 0 1288 1229 1422 1775 1900
## 1 13119 11198 12011 12542 11904
##
## Age 45 to 49 Age 50 to 54 Age 55 to 59 Age 60 to 64 Age 65 to 69
## 0 2555 3646 4741 5317 4972
## 1 14093 16217 17950 19668 20333
##
## Age 70 to 74 Age 75 to 79 Age 80 or older
## 0 4138 3190 4554
## 1 16283 11012 13231
prop.table(table(brfss_17$health,brfss_17$age),margin=2)
##
## Age 18 to 24 Age 25 to 29 Age 30 to 34 Age 35 to 39 Age 40 to 44
## 0 0.08940099 0.09889756 0.10585871 0.12397849 0.13764126
## 1 0.91059901 0.90110244 0.89414129 0.87602151 0.86235874
##
## Age 45 to 49 Age 50 to 54 Age 55 to 59 Age 60 to 64 Age 65 to 69
## 0 0.15347189 0.18355737 0.20893746 0.21280768 0.19648291
## 1 0.84652811 0.81644263 0.79106254 0.78719232 0.80351709
##
## Age 70 to 74 Age 75 to 79 Age 80 or older
## 0 0.20263454 0.22461625 0.25605848
## 1 0.79736546 0.77538375 0.74394152
chisq.test(table(brfss_17$health,brfss_17$age))
##
## Pearson's Chi-squared test
##
## data: table(brfss_17$health, brfss_17$age)
## X-squared = 3761.7, df = 12, p-value < 2.2e-16
According to Pearson’s Chi-squared test results there is a relationsip between health and age
prop.table(wtd.table(brfss_17$health,brfss_17$income, weights = brfss_17$mmsawt),margin=2)
## $10,000-$15,000 $15,000-$20,000 $20,000-$25,000 $25,000-$35,000
## 0 0.40520728 0.30701362 0.26750414 0.21350408
## 1 0.59479272 0.69298638 0.73249586 0.78649592
## $35,000-$50,000 $50,000-$75,000 $75,000 or more 77 99
## 0 0.15613249 0.11971591 0.06694272 0.24950429 0.15037923
## 1 0.84386751 0.88028409 0.93305728 0.75049571 0.84962077
## less than $10,000
## 0 0.39943802
## 1 0.60056198
Compared to the previous case (with no weights) we can see that if we account for weights income increase in income for lower and middle income groups is associated with larger health disparities.
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1
brfss_17<-brfss_17%>%
filter(tx==1, is.na(mmsawt)==F)
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss_17 )
fit1<-glm(formula=health~income+age+bmi,data=brfss_17,family=binomial)
summary(fit1)
##
## Call:
## glm(formula = health ~ income + age + bmi, family = binomial,
## data = brfss_17)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6177 0.2888 0.4797 0.6989 2.7354
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.970478 0.222836 13.330 < 2e-16 ***
## income$15,000-$20,000 0.013822 0.147340 0.094 0.925259
## income$20,000-$25,000 0.348754 0.143191 2.436 0.014868 *
## income$25,000-$35,000 0.756761 0.142530 5.309 1.10e-07 ***
## income$35,000-$50,000 0.916760 0.138779 6.606 3.95e-11 ***
## income$50,000-$75,000 1.362801 0.141859 9.607 < 2e-16 ***
## income$75,000 or more 1.988971 0.134283 14.812 < 2e-16 ***
## income77 0.553196 0.155868 3.549 0.000386 ***
## income99 1.141448 0.163930 6.963 3.33e-12 ***
## incomeless than $10,000 -0.195862 0.159850 -1.225 0.220467
## ageAge 25 to 29 -0.210833 0.212895 -0.990 0.322020
## ageAge 30 to 34 -0.046191 0.215954 -0.214 0.830632
## ageAge 35 to 39 -0.381852 0.206656 -1.848 0.064636 .
## ageAge 40 to 44 -0.492655 0.199803 -2.466 0.013674 *
## ageAge 45 to 49 -0.646379 0.196930 -3.282 0.001030 **
## ageAge 50 to 54 -0.736230 0.190261 -3.870 0.000109 ***
## ageAge 55 to 59 -0.970148 0.181131 -5.356 8.51e-08 ***
## ageAge 60 to 64 -1.304463 0.174932 -7.457 8.85e-14 ***
## ageAge 65 to 69 -0.902824 0.174249 -5.181 2.20e-07 ***
## ageAge 70 to 74 -1.037456 0.174496 -5.945 2.76e-09 ***
## ageAge 75 to 79 -0.916800 0.181514 -5.051 4.40e-07 ***
## ageAge 80 or older -1.125982 0.172053 -6.544 5.97e-11 ***
## bmi -0.061805 0.004437 -13.930 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 8098.1 on 7874 degrees of freedom
## Residual deviance: 7094.1 on 7852 degrees of freedom
## (758 observations deleted due to missingness)
## AIC: 7140.1
##
## Number of Fisher Scoring iterations: 5
fit2<-glm(formula=health~income+age+bmi,weights = mmsawt,data=brfss_17)
summary(fit2)
##
## Call:
## glm(formula = health ~ income + age + bmi, data = brfss_17, weights = mmsawt)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -117.562 -0.090 1.498 5.277 86.718
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.922932 0.029474 31.313 < 2e-16 ***
## income$15,000-$20,000 0.130583 0.025937 5.035 4.89e-07 ***
## income$20,000-$25,000 0.110193 0.024407 4.515 6.43e-06 ***
## income$25,000-$35,000 0.278789 0.024732 11.272 < 2e-16 ***
## income$35,000-$50,000 0.291231 0.023576 12.353 < 2e-16 ***
## income$50,000-$75,000 0.330374 0.023741 13.916 < 2e-16 ***
## income$75,000 or more 0.371627 0.021710 17.118 < 2e-16 ***
## income77 0.175124 0.026068 6.718 1.97e-11 ***
## income99 0.286867 0.028429 10.091 < 2e-16 ***
## incomeless than $10,000 0.033259 0.027162 1.224 0.220822
## ageAge 25 to 29 -0.020885 0.018500 -1.129 0.258969
## ageAge 30 to 34 -0.007324 0.017475 -0.419 0.675168
## ageAge 35 to 39 -0.025257 0.018112 -1.394 0.163207
## ageAge 40 to 44 -0.031537 0.018467 -1.708 0.087719 .
## ageAge 45 to 49 -0.058278 0.018354 -3.175 0.001503 **
## ageAge 50 to 54 -0.067631 0.019123 -3.537 0.000407 ***
## ageAge 55 to 59 -0.114326 0.019121 -5.979 2.34e-09 ***
## ageAge 60 to 64 -0.117754 0.019475 -6.046 1.55e-09 ***
## ageAge 65 to 69 -0.120250 0.021073 -5.706 1.20e-08 ***
## ageAge 70 to 74 -0.118691 0.023289 -5.097 3.54e-07 ***
## ageAge 75 to 79 -0.120797 0.025629 -4.713 2.48e-06 ***
## ageAge 80 or older -0.226451 0.026704 -8.480 < 2e-16 ***
## bmi -0.011230 0.000681 -16.491 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 236.8947)
##
## Null deviance: 2174559 on 7874 degrees of freedom
## Residual deviance: 1860097 on 7852 degrees of freedom
## (758 observations deleted due to missingness)
## AIC: 19994
##
## Number of Fisher Scoring iterations: 2
fit3<-svyglm(formula=health~income+age+bmi,des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit3)
##
## Call:
## svyglm(formula = health ~ income + age + bmi, design = des, family = binomial)
##
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = brfss_17)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.781608 0.414369 6.713 2.04e-11 ***
## income$15,000-$20,000 0.546050 0.326634 1.672 0.094614 .
## income$20,000-$25,000 0.435268 0.308679 1.410 0.158550
## income$25,000-$35,000 1.407515 0.318182 4.424 9.84e-06 ***
## income$35,000-$50,000 1.498483 0.303838 4.932 8.31e-07 ***
## income$50,000-$75,000 1.853913 0.325855 5.689 1.32e-08 ***
## income$75,000 or more 2.327296 0.293635 7.926 2.58e-15 ***
## income77 0.733109 0.351361 2.086 0.036967 *
## income99 1.457847 0.350243 4.162 3.18e-05 ***
## incomeless than $10,000 0.042734 0.374783 0.114 0.909223
## ageAge 25 to 29 -0.176012 0.312889 -0.563 0.573765
## ageAge 30 to 34 0.005613 0.348279 0.016 0.987142
## ageAge 35 to 39 -0.174192 0.341177 -0.511 0.609672
## ageAge 40 to 44 -0.270353 0.326155 -0.829 0.407180
## ageAge 45 to 49 -0.463053 0.337013 -1.374 0.169483
## ageAge 50 to 54 -0.560202 0.327180 -1.712 0.086897 .
## ageAge 55 to 59 -0.882854 0.323893 -2.726 0.006430 **
## ageAge 60 to 64 -0.893925 0.309704 -2.886 0.003908 **
## ageAge 65 to 69 -0.920275 0.318765 -2.887 0.003900 **
## ageAge 70 to 74 -0.853756 0.339420 -2.515 0.011912 *
## ageAge 75 to 79 -0.882638 0.370364 -2.383 0.017188 *
## ageAge 80 or older -1.434560 0.420317 -3.413 0.000646 ***
## bmi -0.073589 0.010162 -7.242 4.85e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 0.9425965)
##
## Number of Fisher Scoring iterations: 5
When we add weights to our naive model income does not make such a big difference, increase in income does not improve health as much as in the first model. In the second model (with weights) the association between bmi and health also not so pronounced. When we incorporate weights and survey design in our model we see the following results. Age variable has significance only for older ages (age groupd from 60 to 69), inversely related with good health. Higher income is associated with good health, the result is statisticaly significant. As expected higher bmi is associated with bad health outcomes, the coefficient is also statistically significant.The last model shows the strongest association between bmi and bad health.
stargazer(fit1, fit2, fit3, style="demography", type="text",
column.labels = c("GLM", "Weights Only", "Survey"),
title = "Logistic regression models for health using survey data - BRFSS 2017",
covariate.labels=c("Income","Age 18 to 24",
"Age 25 to 29",
"Age 30 to 34",
"Age 35 to 39",
"Age 40 to 44",
"Age 45 to 49",
"Age 50 to 54",
"Age 55 to 59",
"Age 60 to 64",
"Age 65 to 69",
"Age 70 to 74",
"Age 75 to 79",
"Age 80 or older",
"Bmi"),
keep.stat="n", model.names=F, align=T, ci=T)
##
## Logistic regression models for health using survey data - BRFSS 2017
## ---------------------------------------------------------------------
## health
## GLM Weights Only Survey
## Model 1 Model 2 Model 3
## ---------------------------------------------------------------------
## Income 0.014 0.131*** 0.546
## (-0.275, 0.303) (0.080, 0.181) (-0.094, 1.186)
## Age 18 to 24 0.349* 0.110*** 0.435
## (0.068, 0.629) (0.062, 0.158) (-0.170, 1.040)
## Age 25 to 29 0.757*** 0.279*** 1.408***
## (0.477, 1.036) (0.230, 0.327) (0.784, 2.031)
## Age 30 to 34 0.917*** 0.291*** 1.498***
## (0.645, 1.189) (0.245, 0.337) (0.903, 2.094)
## Age 35 to 39 1.363*** 0.330*** 1.854***
## (1.085, 1.641) (0.284, 0.377) (1.215, 2.493)
## Age 40 to 44 1.989*** 0.372*** 2.327***
## (1.726, 2.252) (0.329, 0.414) (1.752, 2.903)
## Age 45 to 49 0.553*** 0.175*** 0.733*
## (0.248, 0.859) (0.124, 0.226) (0.044, 1.422)
## Age 50 to 54 1.141*** 0.287*** 1.458***
## (0.820, 1.463) (0.231, 0.343) (0.771, 2.144)
## Age 55 to 59 -0.196 0.033 0.043
## (-0.509, 0.117) (-0.020, 0.086) (-0.692, 0.777)
## Age 60 to 64 -0.211 -0.021 -0.176
## (-0.628, 0.206) (-0.057, 0.015) (-0.789, 0.437)
## Age 65 to 69 -0.046 -0.007 0.006
## (-0.469, 0.377) (-0.042, 0.027) (-0.677, 0.688)
## Age 70 to 74 -0.382 -0.025 -0.174
## (-0.787, 0.023) (-0.061, 0.010) (-0.843, 0.495)
## Age 75 to 79 -0.493* -0.032 -0.270
## (-0.884, -0.101) (-0.068, 0.005) (-0.910, 0.369)
## Age 80 or older -0.646** -0.058** -0.463
## (-1.032, -0.260) (-0.094, -0.022) (-1.124, 0.197)
## Bmi -0.736*** -0.068*** -0.560
## (-1.109, -0.363) (-0.105, -0.030) (-1.201, 0.081)
## ageAge 55 to 59 -0.970*** -0.114*** -0.883**
## (-1.325, -0.615) (-0.152, -0.077) (-1.518, -0.248)
## ageAge 60 to 64 -1.304*** -0.118*** -0.894**
## (-1.647, -0.962) (-0.156, -0.080) (-1.501, -0.287)
## ageAge 65 to 69 -0.903*** -0.120*** -0.920**
## (-1.244, -0.561) (-0.162, -0.079) (-1.545, -0.296)
## ageAge 70 to 74 -1.037*** -0.119*** -0.854*
## (-1.379, -0.695) (-0.164, -0.073) (-1.519, -0.189)
## ageAge 75 to 79 -0.917*** -0.121*** -0.883*
## (-1.273, -0.561) (-0.171, -0.071) (-1.609, -0.157)
## ageAge 80 or older -1.126*** -0.226*** -1.435***
## (-1.463, -0.789) (-0.279, -0.174) (-2.258, -0.611)
## bmi -0.062*** -0.011*** -0.074***
## (-0.071, -0.053) (-0.013, -0.010) (-0.094, -0.054)
## Constant 2.970*** 0.923*** 2.782***
## (2.534, 3.407) (0.865, 0.981) (1.969, 3.594)
## N 7,875 7,875 7,875
## ---------------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001