Data

I use 2016 CDC Behavioral Risk Factor Surveillance System (BRFSS) SMART metro area survey data for this research question.

Dependent variable

  1. The binary outcome variable is health, we recode categorical variable into two outcomes: 1 - good health (excellent, very good, good), 0 - bad health (fair, poor)
  2. There is a number of variables that are associated with health. We would like to see the association between bmi (predictor) and health.
brfss_17$health<-Recode(brfss_17$genhlth, recodes="1:3=1;4:5=0;else=NA")

Predictors

  1. Key independent variable is bmi, but other predictors include income and age (control variable).

income

brfss_17$income<-Recode(brfss_17$income2,recodes="77=NA;99=NA")
brfss_17$income<-Recode(brfss_17$income2, recodes="1='less than $10,000';
                                                   2='$10,000-$15,000';
                                                   3='$15,000-$20,000';
                                                   4='$20,000-$25,000';
                                                   5='$25,000-$35,000';
                                                   6='$35,000-$50,000';
                                                   7='$50,000-$75,000';
                                                   8='$75,000 or more'", as.factor=TRUE)

age

brfss_17$age<-Recode(brfss_17$age80,recodes="18:24='Age 18 to 24';
                                             25:29='Age 25 to 29';
                                             30:34='Age 30 to 34';
                                             35:39='Age 35 to 39';
                                             40:44='Age 40 to 44';
                                             45:49='Age 45 to 49';
                                             50:54='Age 50 to 54';
                                             55:59='Age 55 to 59';
                                             60:64='Age 60 to 64';
                                             65:69='Age 65 to 69';
                                             70:74='Age 70 to 74';
                                             75:79='Age 75 to 79';
                                             80:99='Age 80 or older'",as.factor=TRUE)

bmi

brfss_17$bmi<-brfss_17$bmi5/100

Descriptive analysis

  1. income

table(brfss_17$health,brfss_17$income)
##    
##     $10,000-$15,000 $15,000-$20,000 $20,000-$25,000 $25,000-$35,000
##   0            3671            4333            4430            4102
##   1            4650            8321           11431           14377
##    
##     $35,000-$50,000 $50,000-$75,000 $75,000 or more    77    99
##   0            4337            3570            4777  4392  3147
##   1           21440           26445           67161 12071 17787
##    
##     less than $10,000
##   0              3658
##   1              4490
prop.table(table(brfss_17$health,brfss_17$income),margin=2)
##    
##     $10,000-$15,000 $15,000-$20,000 $20,000-$25,000 $25,000-$35,000
##   0       0.4411729       0.3424214       0.2793014       0.2219817
##   1       0.5588271       0.6575786       0.7206986       0.7780183
##    
##     $35,000-$50,000 $50,000-$75,000 $75,000 or more        77        99
##   0       0.1682508       0.1189405       0.0664044 0.2667801 0.1503296
##   1       0.8317492       0.8810595       0.9335956 0.7332199 0.8496704
##    
##     less than $10,000
##   0         0.4489445
##   1         0.5510555
chisq.test(table(brfss_17$health,brfss_17$income))
## 
##  Pearson's Chi-squared test
## 
## data:  table(brfss_17$health, brfss_17$income)
## X-squared = 19674, df = 9, p-value < 2.2e-16

Tables demonstrate that with increase in income the proportion of residents who report good health is growing and the proportion of those reporting bad health is decreasing. According to Pearson’s Chi-squared test results there is a relationsip between income and age. # age

table(brfss_17$health,brfss_17$age)
##    
##     Age 18 to 24 Age 25 to 29 Age 30 to 34 Age 35 to 39 Age 40 to 44
##   0         1288         1229         1422         1775         1900
##   1        13119        11198        12011        12542        11904
##    
##     Age 45 to 49 Age 50 to 54 Age 55 to 59 Age 60 to 64 Age 65 to 69
##   0         2555         3646         4741         5317         4972
##   1        14093        16217        17950        19668        20333
##    
##     Age 70 to 74 Age 75 to 79 Age 80 or older
##   0         4138         3190            4554
##   1        16283        11012           13231
prop.table(table(brfss_17$health,brfss_17$age),margin=2)
##    
##     Age 18 to 24 Age 25 to 29 Age 30 to 34 Age 35 to 39 Age 40 to 44
##   0   0.08940099   0.09889756   0.10585871   0.12397849   0.13764126
##   1   0.91059901   0.90110244   0.89414129   0.87602151   0.86235874
##    
##     Age 45 to 49 Age 50 to 54 Age 55 to 59 Age 60 to 64 Age 65 to 69
##   0   0.15347189   0.18355737   0.20893746   0.21280768   0.19648291
##   1   0.84652811   0.81644263   0.79106254   0.78719232   0.80351709
##    
##     Age 70 to 74 Age 75 to 79 Age 80 or older
##   0   0.20263454   0.22461625      0.25605848
##   1   0.79736546   0.77538375      0.74394152
chisq.test(table(brfss_17$health,brfss_17$age))
## 
##  Pearson's Chi-squared test
## 
## data:  table(brfss_17$health, brfss_17$age)
## X-squared = 3761.7, df = 12, p-value < 2.2e-16

According to Pearson’s Chi-squared test results there is a relationsip between health and age

Analysis using weights

income

prop.table(wtd.table(brfss_17$health,brfss_17$income, weights = brfss_17$mmsawt),margin=2)
##   $10,000-$15,000 $15,000-$20,000 $20,000-$25,000 $25,000-$35,000
## 0      0.40520728      0.30701362      0.26750414      0.21350408
## 1      0.59479272      0.69298638      0.73249586      0.78649592
##   $35,000-$50,000 $50,000-$75,000 $75,000 or more         77         99
## 0      0.15613249      0.11971591      0.06694272 0.24950429 0.15037923
## 1      0.84386751      0.88028409      0.93305728 0.75049571 0.84962077
##   less than $10,000
## 0        0.39943802
## 1        0.60056198

Compared to the previous case (with no weights) we can see that if we account for weights income increase in income for lower and middle income groups is associated with larger health disparities.

Survey design object

Texas residents

brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1
brfss_17<-brfss_17%>%
  filter(tx==1, is.na(mmsawt)==F)

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss_17 )

Logistic regression

without weights

fit1<-glm(formula=health~income+age+bmi,data=brfss_17,family=binomial)
summary(fit1)
## 
## Call:
## glm(formula = health ~ income + age + bmi, family = binomial, 
##     data = brfss_17)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6177   0.2888   0.4797   0.6989   2.7354  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              2.970478   0.222836  13.330  < 2e-16 ***
## income$15,000-$20,000    0.013822   0.147340   0.094 0.925259    
## income$20,000-$25,000    0.348754   0.143191   2.436 0.014868 *  
## income$25,000-$35,000    0.756761   0.142530   5.309 1.10e-07 ***
## income$35,000-$50,000    0.916760   0.138779   6.606 3.95e-11 ***
## income$50,000-$75,000    1.362801   0.141859   9.607  < 2e-16 ***
## income$75,000 or more    1.988971   0.134283  14.812  < 2e-16 ***
## income77                 0.553196   0.155868   3.549 0.000386 ***
## income99                 1.141448   0.163930   6.963 3.33e-12 ***
## incomeless than $10,000 -0.195862   0.159850  -1.225 0.220467    
## ageAge 25 to 29         -0.210833   0.212895  -0.990 0.322020    
## ageAge 30 to 34         -0.046191   0.215954  -0.214 0.830632    
## ageAge 35 to 39         -0.381852   0.206656  -1.848 0.064636 .  
## ageAge 40 to 44         -0.492655   0.199803  -2.466 0.013674 *  
## ageAge 45 to 49         -0.646379   0.196930  -3.282 0.001030 ** 
## ageAge 50 to 54         -0.736230   0.190261  -3.870 0.000109 ***
## ageAge 55 to 59         -0.970148   0.181131  -5.356 8.51e-08 ***
## ageAge 60 to 64         -1.304463   0.174932  -7.457 8.85e-14 ***
## ageAge 65 to 69         -0.902824   0.174249  -5.181 2.20e-07 ***
## ageAge 70 to 74         -1.037456   0.174496  -5.945 2.76e-09 ***
## ageAge 75 to 79         -0.916800   0.181514  -5.051 4.40e-07 ***
## ageAge 80 or older      -1.125982   0.172053  -6.544 5.97e-11 ***
## bmi                     -0.061805   0.004437 -13.930  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8098.1  on 7874  degrees of freedom
## Residual deviance: 7094.1  on 7852  degrees of freedom
##   (758 observations deleted due to missingness)
## AIC: 7140.1
## 
## Number of Fisher Scoring iterations: 5

with weights

fit2<-glm(formula=health~income+age+bmi,weights = mmsawt,data=brfss_17)
summary(fit2)
## 
## Call:
## glm(formula = health ~ income + age + bmi, data = brfss_17, weights = mmsawt)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -117.562    -0.090     1.498     5.277    86.718  
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.922932   0.029474  31.313  < 2e-16 ***
## income$15,000-$20,000    0.130583   0.025937   5.035 4.89e-07 ***
## income$20,000-$25,000    0.110193   0.024407   4.515 6.43e-06 ***
## income$25,000-$35,000    0.278789   0.024732  11.272  < 2e-16 ***
## income$35,000-$50,000    0.291231   0.023576  12.353  < 2e-16 ***
## income$50,000-$75,000    0.330374   0.023741  13.916  < 2e-16 ***
## income$75,000 or more    0.371627   0.021710  17.118  < 2e-16 ***
## income77                 0.175124   0.026068   6.718 1.97e-11 ***
## income99                 0.286867   0.028429  10.091  < 2e-16 ***
## incomeless than $10,000  0.033259   0.027162   1.224 0.220822    
## ageAge 25 to 29         -0.020885   0.018500  -1.129 0.258969    
## ageAge 30 to 34         -0.007324   0.017475  -0.419 0.675168    
## ageAge 35 to 39         -0.025257   0.018112  -1.394 0.163207    
## ageAge 40 to 44         -0.031537   0.018467  -1.708 0.087719 .  
## ageAge 45 to 49         -0.058278   0.018354  -3.175 0.001503 ** 
## ageAge 50 to 54         -0.067631   0.019123  -3.537 0.000407 ***
## ageAge 55 to 59         -0.114326   0.019121  -5.979 2.34e-09 ***
## ageAge 60 to 64         -0.117754   0.019475  -6.046 1.55e-09 ***
## ageAge 65 to 69         -0.120250   0.021073  -5.706 1.20e-08 ***
## ageAge 70 to 74         -0.118691   0.023289  -5.097 3.54e-07 ***
## ageAge 75 to 79         -0.120797   0.025629  -4.713 2.48e-06 ***
## ageAge 80 or older      -0.226451   0.026704  -8.480  < 2e-16 ***
## bmi                     -0.011230   0.000681 -16.491  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 236.8947)
## 
##     Null deviance: 2174559  on 7874  degrees of freedom
## Residual deviance: 1860097  on 7852  degrees of freedom
##   (758 observations deleted due to missingness)
## AIC: 19994
## 
## Number of Fisher Scoring iterations: 2

incorporating design effects

fit3<-svyglm(formula=health~income+age+bmi,des,family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
summary(fit3)
## 
## Call:
## svyglm(formula = health ~ income + age + bmi, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~ststr, weights = ~mmsawt, data = brfss_17)
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              2.781608   0.414369   6.713 2.04e-11 ***
## income$15,000-$20,000    0.546050   0.326634   1.672 0.094614 .  
## income$20,000-$25,000    0.435268   0.308679   1.410 0.158550    
## income$25,000-$35,000    1.407515   0.318182   4.424 9.84e-06 ***
## income$35,000-$50,000    1.498483   0.303838   4.932 8.31e-07 ***
## income$50,000-$75,000    1.853913   0.325855   5.689 1.32e-08 ***
## income$75,000 or more    2.327296   0.293635   7.926 2.58e-15 ***
## income77                 0.733109   0.351361   2.086 0.036967 *  
## income99                 1.457847   0.350243   4.162 3.18e-05 ***
## incomeless than $10,000  0.042734   0.374783   0.114 0.909223    
## ageAge 25 to 29         -0.176012   0.312889  -0.563 0.573765    
## ageAge 30 to 34          0.005613   0.348279   0.016 0.987142    
## ageAge 35 to 39         -0.174192   0.341177  -0.511 0.609672    
## ageAge 40 to 44         -0.270353   0.326155  -0.829 0.407180    
## ageAge 45 to 49         -0.463053   0.337013  -1.374 0.169483    
## ageAge 50 to 54         -0.560202   0.327180  -1.712 0.086897 .  
## ageAge 55 to 59         -0.882854   0.323893  -2.726 0.006430 ** 
## ageAge 60 to 64         -0.893925   0.309704  -2.886 0.003908 ** 
## ageAge 65 to 69         -0.920275   0.318765  -2.887 0.003900 ** 
## ageAge 70 to 74         -0.853756   0.339420  -2.515 0.011912 *  
## ageAge 75 to 79         -0.882638   0.370364  -2.383 0.017188 *  
## ageAge 80 or older      -1.434560   0.420317  -3.413 0.000646 ***
## bmi                     -0.073589   0.010162  -7.242 4.85e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.9425965)
## 
## Number of Fisher Scoring iterations: 5

Interpretation of the results

When we add weights to our naive model income does not make such a big difference, increase in income does not improve health as much as in the first model. In the second model (with weights) the association between bmi and health also not so pronounced. When we incorporate weights and survey design in our model we see the following results. Age variable has significance only for older ages (age groupd from 60 to 69), inversely related with good health. Higher income is associated with good health, the result is statisticaly significant. As expected higher bmi is associated with bad health outcomes, the coefficient is also statistically significant.The last model shows the strongest association between bmi and bad health.

Comparing the results of three models

stargazer(fit1, fit2, fit3, style="demography", type="text",
          column.labels = c("GLM", "Weights Only", "Survey"),
          title = "Logistic regression models for health using survey data - BRFSS 2017", 
          covariate.labels=c("Income","Age 18 to 24",
                                      "Age 25 to 29",
                                      "Age 30 to 34",
                                      "Age 35 to 39",
                                      "Age 40 to 44",
                                      "Age 45 to 49",
                                      "Age 50 to 54",
                                      "Age 55 to 59",
                                      "Age 60 to 64",
                                      "Age 65 to 69",
                                      "Age 70 to 74",
                                      "Age 75 to 79",
                                      "Age 80 or older",
                                      "Bmi"), 
          keep.stat="n", model.names=F, align=T, ci=T)
## 
## Logistic regression models for health using survey data - BRFSS 2017
## ---------------------------------------------------------------------
##                                          health                      
##                          GLM          Weights Only        Survey     
##                        Model 1          Model 2          Model 3     
## ---------------------------------------------------------------------
## Income                  0.014           0.131***          0.546      
##                    (-0.275, 0.303)   (0.080, 0.181)  (-0.094, 1.186) 
## Age 18 to 24            0.349*          0.110***          0.435      
##                     (0.068, 0.629)   (0.062, 0.158)  (-0.170, 1.040) 
## Age 25 to 29           0.757***         0.279***         1.408***    
##                     (0.477, 1.036)   (0.230, 0.327)   (0.784, 2.031) 
## Age 30 to 34           0.917***         0.291***         1.498***    
##                     (0.645, 1.189)   (0.245, 0.337)   (0.903, 2.094) 
## Age 35 to 39           1.363***         0.330***         1.854***    
##                     (1.085, 1.641)   (0.284, 0.377)   (1.215, 2.493) 
## Age 40 to 44           1.989***         0.372***         2.327***    
##                     (1.726, 2.252)   (0.329, 0.414)   (1.752, 2.903) 
## Age 45 to 49           0.553***         0.175***          0.733*     
##                     (0.248, 0.859)   (0.124, 0.226)   (0.044, 1.422) 
## Age 50 to 54           1.141***         0.287***         1.458***    
##                     (0.820, 1.463)   (0.231, 0.343)   (0.771, 2.144) 
## Age 55 to 59            -0.196           0.033            0.043      
##                    (-0.509, 0.117)  (-0.020, 0.086)  (-0.692, 0.777) 
## Age 60 to 64            -0.211           -0.021           -0.176     
##                    (-0.628, 0.206)  (-0.057, 0.015)  (-0.789, 0.437) 
## Age 65 to 69            -0.046           -0.007           0.006      
##                    (-0.469, 0.377)  (-0.042, 0.027)  (-0.677, 0.688) 
## Age 70 to 74            -0.382           -0.025           -0.174     
##                    (-0.787, 0.023)  (-0.061, 0.010)  (-0.843, 0.495) 
## Age 75 to 79           -0.493*           -0.032           -0.270     
##                    (-0.884, -0.101) (-0.068, 0.005)  (-0.910, 0.369) 
## Age 80 or older        -0.646**         -0.058**          -0.463     
##                    (-1.032, -0.260) (-0.094, -0.022) (-1.124, 0.197) 
## Bmi                   -0.736***        -0.068***          -0.560     
##                    (-1.109, -0.363) (-0.105, -0.030) (-1.201, 0.081) 
## ageAge 55 to 59       -0.970***        -0.114***         -0.883**    
##                    (-1.325, -0.615) (-0.152, -0.077) (-1.518, -0.248)
## ageAge 60 to 64       -1.304***        -0.118***         -0.894**    
##                    (-1.647, -0.962) (-0.156, -0.080) (-1.501, -0.287)
## ageAge 65 to 69       -0.903***        -0.120***         -0.920**    
##                    (-1.244, -0.561) (-0.162, -0.079) (-1.545, -0.296)
## ageAge 70 to 74       -1.037***        -0.119***         -0.854*     
##                    (-1.379, -0.695) (-0.164, -0.073) (-1.519, -0.189)
## ageAge 75 to 79       -0.917***        -0.121***         -0.883*     
##                    (-1.273, -0.561) (-0.171, -0.071) (-1.609, -0.157)
## ageAge 80 or older    -1.126***        -0.226***        -1.435***    
##                    (-1.463, -0.789) (-0.279, -0.174) (-2.258, -0.611)
## bmi                   -0.062***        -0.011***        -0.074***    
##                    (-0.071, -0.053) (-0.013, -0.010) (-0.094, -0.054)
## Constant               2.970***         0.923***         2.782***    
##                     (2.534, 3.407)   (0.865, 0.981)   (1.969, 3.594) 
## N                       7,875            7,875            7,875      
## ---------------------------------------------------------------------
## *p < .05; **p < .01; ***p < .001