load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
nams<-names(brfss_17)
head(nams, n=10)
## [1] "dispcode" "statere1" "safetime" "hhadult" "genhlth" "physhlth"
## [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(brfss_17)<-newnames
##Get TX Cities
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1
##Remove Non-Responses
brfss_17<-brfss_17%>%
filter(tx==1, is.na(mmsawt)==F, is.na(hlthpln1)==F)
# 1. Define a binary outcome variable of your choosing and define how you recode the original variable.
##I will use the variable hlthpln1 as my binary variable. The question is: Do you have any kind of health care coverage, including health insurance, prepaid plans such as HMOs, or government plans such as Medicare,or Indian Health Service?
#I used your code to recode the original variable.
brfss_17$hlthpln1<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
View(brfss_17)
#2. State a research question about what factors you believe will affect your outcome variable.
##What effect does gender and education have on health insurance coverage?
#3. Define at least 2 predictor variables, based on your research question. For this assignment, itβs best if these are categorical variables.
##I will use incomeg and educag and here is how I recoded the variables.
brfss_17$educ<-Recode(brfss_17$educa, recodes="1:3='0hs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
##Gender variable recode
brfss_17$gender<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))
# brfss_17$employ<-Recode(brfss_17$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
# brfss_17$employ<-relevel(brfss_17$employ, ref='Employed')
#
# as.factor(brfss_17$employ)
#
# as.factor(brfss_17$educ)
#
# table(brfss_17$employ)
#4. Perform a descriptive analysis of the outcome variable by each of the variables you defined in part b. (e.g. 2 x 2 table, 2 x k table). Follow a similar approach to presenting your statistics as presented in Sparks 2009 (in the Google drive). This can be done easily using the tableone package!
library(tableone)
t1<-CreateTableOne(vars = c("educ", "gender"), strata = "hlthpln1", test = T, data = brfss_17)
#t1<-print(t1, format="p")
print(t1,format="p")
## Stratified by hlthpln1
## 0 1 p test
## n 1131 7466
## educ (%) <0.001
## 0hs 27.9 7.2
## 2hsgrad 33.0 20.7
## 3somecol 23.3 26.3
## 4colgrad 15.8 45.8
## gender = Male (%) 47.3 40.8 <0.001
print(t1)
## Stratified by hlthpln1
## 0 1 p test
## n 1131 7466
## educ (%) <0.001
## 0hs 314 (27.9) 535 ( 7.2)
## 2hsgrad 372 (33.0) 1539 (20.7)
## 3somecol 262 (23.3) 1956 (26.3)
## 4colgrad 178 (15.8) 3405 (45.8)
## gender = Male (%) 535 (47.3) 3049 (40.8) <0.001
#4.1 Calculate descriptive statistics (mean or percentages) for each variable using no weights or survey design, as well as with full survey design and weights.
#For educ variable
##Raw frequencies - No Weights
table(brfss_17$hlthpln1, brfss_17$educ)
##
## 0hs 2hsgrad 3somecol 4colgrad
## 0 314 372 262 178
## 1 535 1539 1956 3405
##Frequencies with Weights
teduc<-wtd.table(brfss_17$hlthpln1, brfss_17$educ, weights = brfss_17$mmsawt)
print(teduc)
## 0hs 2hsgrad 3somecol 4colgrad
## 0 1284062.4 1139395.3 881802.7 266394.6
## 1 1225410.5 2644849.0 3857401.4 3986029.0
##column percentages - No Weights
prop.table(table(brfss_17$hlthpln1, brfss_17$educ), margin=2)
##
## 0hs 2hsgrad 3somecol 4colgrad
## 0 0.36984688 0.19466248 0.11812444 0.04967904
## 1 0.63015312 0.80533752 0.88187556 0.95032096
##Percentages - Weights
prop.table(wtd.table(brfss_17$hlthpln1, brfss_17$educ, weights = brfss_17$mmsawt), margin=2)
## 0hs 2hsgrad 3somecol 4colgrad
## 0 0.51168609 0.30108926 0.18606557 0.06264536
## 1 0.48831391 0.69891074 0.81393443 0.93735464
##basic chi square test of independence
chisq.test(table(brfss_17$hlthpln1, brfss_17$educ))
##
## Pearson's Chi-squared test
##
## data: table(brfss_17$hlthpln1, brfss_17$educ)
## X-squared = 702.45, df = 3, p-value < 2.2e-16
#For gender variable
##Raw frequencies - No Weights
table(brfss_17$hlthpln1, brfss_17$gender)
##
## Female Male
## 0 596 535
## 1 4417 3049
##Frequencies - With Weights
tgen<-wtd.table(brfss_17$hlthpln1, brfss_17$gender, weights = brfss_17$mmsawt)
print(tgen)
## Female Male
## 0 1566721 2012799
## 1 6302505 5452694
##column percentages - No weights
prop.table(table(brfss_17$hlthpln1, brfss_17$gender), margin=2)
##
## Female Male
## 0 0.1188909 0.1492746
## 1 0.8811091 0.8507254
##Percentages - Weights
prop.table(wtd.table(brfss_17$chcocncr, brfss_17$sex, weights = brfss_17$mmsawt), margin=2)
## 1 2 9
## 1 0.0380162823 0.0633983186 0.0000000000
## 2 0.9603326609 0.9363069333 1.0000000000
## 7 0.0016510568 0.0001889718 0.0000000000
## 9 0.0000000000 0.0001057764 0.0000000000
##basic chi square test of independence
chisq.test(table(brfss_17$hlthpln1, brfss_17$gender))
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: table(brfss_17$hlthpln1, brfss_17$gender)
## X-squared = 16.622, df = 1, p-value = 4.563e-05
#4.2 Calculate percentages, or means, for each of your independent variables for each level of your outcome variable and present this in a table, with appropriate survey-corrected test statistics. (tableone package helps)
library(tableone)
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss_17)
options(survey.lonely.psu = "adjust")
st1<-svyCreateTableOne(vars = c("gender", "educ"), strata = "hlthpln1", test = T, data = des)
#st1<-print(st1, format="p")
print(st1, format="p")
## Stratified by hlthpln1
## 0 1 p test
## n 3579519.3 11755198.9
## gender = Male (%) 56.2 46.4 0.001
## educ (%) <0.001
## 0hs 36.0 10.5
## 2hsgrad 31.9 22.6
## 3somecol 24.7 32.9
## 4colgrad 7.5 34.0
#4.3 Are there substantive differences in the descriptive results between the analysis using survey design and that not using survey design?
##Yes, the numbers are slightly larger in the responses to the table in 4.2, which did not include a survey design. Once the survey design is included, the weights gave me the higher numbers. For example, in table 4, 47.3% of males do not have insurance, whereas 56.2% of males do not have insurance in the table with the survey design. In the table without survey design, 7.2% of people without a high school diploma in Texas have insurance compared to 10.5% in the table with the survey design. Those numbers also change for people with high school diploma, where 20.7% people have insurance in the original table versus 22.6% in the table with the survey design.