For this homework assignment, I will assess how income and race influence obesity status using BRFFS 2017 data.I predict NHW will have lowest prevalence of obesity and that those in the highest income brackets will also have lower rates of obesity.
#load required packages
library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tableone)
#load brfss data
load("C:/Users/xln973/Google Drive/class_20_7283/data/brfss_2017.Rdata")
#select cases
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1
brfsstx<-brfss_17%>%filter(tx==1, is.na(mmsawt)==F)
##Recode Variables
#recode variables
#bmi
brfsstx$bmi<-brfsstx$bmi5/100
brfsstx$obese<-ifelse(brfsstx$bmi>=30, 1, 0)
table(brfsstx$obese)
##
## 0 1
## 5425 2493
#race level
brfsstx$racegr4<-Recode(brfsstx$racegr3, recodes="1='NHWhite'; 2='NHBlack'; 3:4='NHOther'; 5='Hispanic'; 9=NA", as.factor=T)
brfsstx$racegr4<-relevel(brfsstx$racegr4, ref='NHWhite')
table(brfsstx$racegr4)
##
## NHWhite Hispanic NHBlack NHOther
## 5234 1989 761 441
#income
brfsstx$income<-Recode(brfsstx$incomg, recodes="1='<15k'; 2='15k-24,999k'; 3='25k-34,999k'; 4='35k-49,999k'; 5='50k+'; 9=NA",as.factor=T)
brfsstx$income<-relevel(brfsstx$income, ref='50k+')
table(brfsstx$income)
##
## 50k+ <15k 15k-24,999k 25k-34,999k 35k-49,999k
## 3679 783 1190 769 953
##Descriptive Statistics
#race and obese not weighted
table(brfsstx$obese, brfsstx$racegr4)
##
## NHWhite Hispanic NHBlack NHOther
## 0 3491 1090 419 307
## 1 1421 650 280 92
prop.table(table(brfsstx$obese, brfsstx$racegr4), margin=2)*100
##
## NHWhite Hispanic NHBlack NHOther
## 0 71.07085 62.64368 59.94278 76.94236
## 1 28.92915 37.35632 40.05722 23.05764
#chi square
chisq.test(table(brfsstx$obese, brfsstx$racegr4))
##
## Pearson's Chi-squared test
##
## data: table(brfsstx$obese, brfsstx$racegr4)
## X-squared = 79.571, df = 3, p-value < 2.2e-16
#income and obese not weighted
table(brfsstx$obese, brfsstx$income)
##
## 50k+ <15k 15k-24,999k 25k-34,999k 35k-49,999k
## 0 2456 443 708 484 610
## 1 1040 262 402 249 295
prop.table(table(brfsstx$obese, brfsstx$income), margin=2)*100
##
## 50k+ <15k 15k-24,999k 25k-34,999k 35k-49,999k
## 0 70.25172 62.83688 63.78378 66.03001 67.40331
## 1 29.74828 37.16312 36.21622 33.96999 32.59669
#chi square
chisq.test(table(brfsstx$obese, brfsstx$income))
##
## Pearson's Chi-squared test
##
## data: table(brfsstx$obese, brfsstx$income)
## X-squared = 26.761, df = 4, p-value = 2.221e-05
##Weighted descriptive anlysis
#race and obese
prop.table(wtd.table(brfsstx$obese, brfsstx$racegr4, weight=brfsstx$mmsawt), margin=2)*100
## NHWhite Hispanic NHBlack NHOther
## 0 70.60288 64.61005 64.62036 81.55063
## 1 29.39712 35.38995 35.37964 18.44937
#income and obese
prop.table(wtd.table(brfsstx$obese, brfsstx$income, weight=brfsstx$mmsawt), margin=2)*100
## 50k+ <15k 15k-24,999k 25k-34,999k 35k-49,999k
## 0 70.72745 65.23112 63.88850 65.02412 71.39462
## 1 29.27255 34.76888 36.11150 34.97588 28.60538
#no survey design, no weights
t1<-CreateTableOne(vars = c("racegr4", "income"), strata = "obese", test = T, data = brfsstx)
print(t1,format="p")
## Stratified by obese
## 0 1 p test
## n 5425 2493
## racegr4 (%) <0.001
## NHWhite 65.8 58.2
## Hispanic 20.5 26.6
## NHBlack 7.9 11.5
## NHOther 5.8 3.8
## income (%) <0.001
## 50k+ 52.2 46.3
## <15k 9.4 11.7
## 15k-24,999k 15.1 17.9
## 25k-34,999k 10.3 11.1
## 35k-49,999k 13.0 13.1
##Survey Design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data=brfsstx)
st1<-svyCreateTableOne(vars= c("racegr4", "income"), strata="obese", test=T, data=des)
print(st1,format="p")
## Stratified by obese
## 0 1 p test
## n 9488033.4 4305759.1
## racegr4 (%) 0.002
## NHWhite 47.1 42.9
## Hispanic 31.4 37.6
## NHBlack 12.6 15.1
## NHOther 8.8 4.3
## income (%) 0.157
## 50k+ 49.2 44.1
## <15k 10.5 12.1
## 15k-24,999k 16.8 20.6
## 25k-34,999k 9.3 10.9
## 35k-49,999k 14.1 12.3
When taking into consideration weights and survey design, there are minor differences in proportions of obesity by race and income. Most notable are the differences in obesity prevalence in Non Hispanic Blacks and Non Hispanic Other. Prevalance of obesity in these populations is over represented in the unweighted analysis.
When survey design and weights are considered obesity proportion of Non Hispanic whites decreases while all other race/ethnicity categories are increased.
Similar results are seen for income.Those who make $50k+ represent 44% of the obese population in Texas when taking into consideration survey design and weights as opposed to 46% for unweighted analysis.
The p values for weighted analysis also increased compared to the unweighted analysis.