For this homework assignment, I will assess how income and race influence obesity status using BRFFS 2017 data.I predict NHW will have lowest prevalence of obesity and that those in the highest income brackets will also have lower rates of obesity.

#load required packages
library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tableone)

#load brfss data
load("C:/Users/xln973/Google Drive/class_20_7283/data/brfss_2017.Rdata")

#select cases
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1
brfsstx<-brfss_17%>%filter(tx==1, is.na(mmsawt)==F)

##Recode Variables

#recode variables
#bmi
brfsstx$bmi<-brfsstx$bmi5/100
brfsstx$obese<-ifelse(brfsstx$bmi>=30, 1, 0)
table(brfsstx$obese)
## 
##    0    1 
## 5425 2493
#race level
brfsstx$racegr4<-Recode(brfsstx$racegr3, recodes="1='NHWhite'; 2='NHBlack'; 3:4='NHOther'; 5='Hispanic'; 9=NA", as.factor=T)
brfsstx$racegr4<-relevel(brfsstx$racegr4, ref='NHWhite')
table(brfsstx$racegr4)
## 
##  NHWhite Hispanic  NHBlack  NHOther 
##     5234     1989      761      441
#income
brfsstx$income<-Recode(brfsstx$incomg, recodes="1='<15k'; 2='15k-24,999k'; 3='25k-34,999k'; 4='35k-49,999k'; 5='50k+'; 9=NA",as.factor=T) 
brfsstx$income<-relevel(brfsstx$income, ref='50k+')
table(brfsstx$income)
## 
##        50k+        <15k 15k-24,999k 25k-34,999k 35k-49,999k 
##        3679         783        1190         769         953

##Descriptive Statistics

#race and obese not weighted
table(brfsstx$obese, brfsstx$racegr4)
##    
##     NHWhite Hispanic NHBlack NHOther
##   0    3491     1090     419     307
##   1    1421      650     280      92
prop.table(table(brfsstx$obese, brfsstx$racegr4), margin=2)*100
##    
##      NHWhite Hispanic  NHBlack  NHOther
##   0 71.07085 62.64368 59.94278 76.94236
##   1 28.92915 37.35632 40.05722 23.05764
#chi square
chisq.test(table(brfsstx$obese, brfsstx$racegr4))
## 
##  Pearson's Chi-squared test
## 
## data:  table(brfsstx$obese, brfsstx$racegr4)
## X-squared = 79.571, df = 3, p-value < 2.2e-16
#income and obese not weighted
table(brfsstx$obese, brfsstx$income)
##    
##     50k+ <15k 15k-24,999k 25k-34,999k 35k-49,999k
##   0 2456  443         708         484         610
##   1 1040  262         402         249         295
prop.table(table(brfsstx$obese, brfsstx$income), margin=2)*100
##    
##         50k+     <15k 15k-24,999k 25k-34,999k 35k-49,999k
##   0 70.25172 62.83688    63.78378    66.03001    67.40331
##   1 29.74828 37.16312    36.21622    33.96999    32.59669
#chi square
chisq.test(table(brfsstx$obese, brfsstx$income))
## 
##  Pearson's Chi-squared test
## 
## data:  table(brfsstx$obese, brfsstx$income)
## X-squared = 26.761, df = 4, p-value = 2.221e-05

##Weighted descriptive anlysis

#race and obese
prop.table(wtd.table(brfsstx$obese, brfsstx$racegr4, weight=brfsstx$mmsawt), margin=2)*100
##    NHWhite Hispanic  NHBlack  NHOther
## 0 70.60288 64.61005 64.62036 81.55063
## 1 29.39712 35.38995 35.37964 18.44937
#income and obese
prop.table(wtd.table(brfsstx$obese, brfsstx$income, weight=brfsstx$mmsawt), margin=2)*100
##       50k+     <15k 15k-24,999k 25k-34,999k 35k-49,999k
## 0 70.72745 65.23112    63.88850    65.02412    71.39462
## 1 29.27255 34.76888    36.11150    34.97588    28.60538
#no survey design, no weights
t1<-CreateTableOne(vars = c("racegr4", "income"), strata = "obese", test = T, data = brfsstx)
print(t1,format="p")
##                 Stratified by obese
##                  0    1    p      test
##   n              5425 2493            
##   racegr4 (%)              <0.001     
##      NHWhite     65.8 58.2            
##      Hispanic    20.5 26.6            
##      NHBlack      7.9 11.5            
##      NHOther      5.8  3.8            
##   income (%)               <0.001     
##      50k+        52.2 46.3            
##      <15k         9.4 11.7            
##      15k-24,999k 15.1 17.9            
##      25k-34,999k 10.3 11.1            
##      35k-49,999k 13.0 13.1

##Survey Design

options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data=brfsstx)

st1<-svyCreateTableOne(vars= c("racegr4", "income"), strata="obese", test=T, data=des)
print(st1,format="p")
##                 Stratified by obese
##                  0         1         p      test
##   n              9488033.4 4305759.1            
##   racegr4 (%)                         0.002     
##      NHWhite          47.1      42.9            
##      Hispanic         31.4      37.6            
##      NHBlack          12.6      15.1            
##      NHOther           8.8       4.3            
##   income (%)                          0.157     
##      50k+             49.2      44.1            
##      <15k             10.5      12.1            
##      15k-24,999k      16.8      20.6            
##      25k-34,999k       9.3      10.9            
##      35k-49,999k      14.1      12.3

When taking into consideration weights and survey design, there are minor differences in proportions of obesity by race and income. Most notable are the differences in obesity prevalence in Non Hispanic Blacks and Non Hispanic Other. Prevalance of obesity in these populations is over represented in the unweighted analysis.

When survey design and weights are considered obesity proportion of Non Hispanic whites decreases while all other race/ethnicity categories are increased.

Similar results are seen for income.Those who make $50k+ represent 44% of the obese population in Texas when taking into consideration survey design and weights as opposed to 46% for unweighted analysis.

The p values for weighted analysis also increased compared to the unweighted analysis.