load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
nams<-names(brfss_17)
head(nams, n=10)
##  [1] "dispcode" "statere1" "safetime" "hhadult"  "genhlth"  "physhlth"
##  [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(brfss_17)<-newnames

##Get TX Cities 
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1

##Remove Non-Responses 
brfss_17<-brfss_17%>%
  filter(tx==1, is.na(mmsawt)==F, is.na(hlthpln1)==F)
# 1. Define a binary outcome variable of your choosing and define how you recode the original variable.
##I will use the variable hlthpln1 as my binary variable. The question is: Do you have any kind of health care coverage, including health insurance, prepaid plans such as HMOs, or government plans such as Medicare,or Indian Health Service?

#I used your code to recode the original variable. 
brfss_17$hlthpln1<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")

View(brfss_17)
#2. State a research question about what factors you believe will affect your outcome variable.
##What effect does gender and education have on health insurance coverage?
#3. Define at least 2 predictor variables, based on your research question. For this assignment, it’s best if these are categorical variables.

##I will use incomeg and educag and here is how I recoded the variables.
brfss_17$educ<-Recode(brfss_17$educa, recodes="1:3='0hs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)

##Gender variable recode
brfss_17$gender<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))


# brfss_17$employ<-Recode(brfss_17$employ, recodes="1:2='Employed'; 2:6='nilf'; 7='retired'; 8='unable'; else=NA", as.factor=T)
# brfss_17$employ<-relevel(brfss_17$employ, ref='Employed')
# 
# as.factor(brfss_17$employ)
# 
# as.factor(brfss_17$educ)
# 
# table(brfss_17$employ)
#4. Perform a descriptive analysis of the outcome variable by each of the variables you defined in part b. (e.g. 2 x 2 table, 2 x k table). Follow a similar approach to presenting your statistics as presented in Sparks 2009 (in the Google drive). This can be done easily using the tableone package!

library(tableone)
t1<-CreateTableOne(vars = c("educ", "gender"), strata = "hlthpln1", test = T, data = brfss_17)
#t1<-print(t1, format="p")
print(t1,format="p")
##                    Stratified by hlthpln1
##                     0    1    p      test
##   n                 1131 7466            
##   educ (%)                    <0.001     
##      0hs            27.9  7.2            
##      2hsgrad        33.0 20.7            
##      3somecol       23.3 26.3            
##      4colgrad       15.8 45.8            
##   gender = Male (%) 47.3 40.8 <0.001
print(t1)
##                    Stratified by hlthpln1
##                     0            1            p      test
##   n                 1131         7466                    
##   educ (%)                                    <0.001     
##      0hs             314 (27.9)   535 ( 7.2)             
##      2hsgrad         372 (33.0)  1539 (20.7)             
##      3somecol        262 (23.3)  1956 (26.3)             
##      4colgrad        178 (15.8)  3405 (45.8)             
##   gender = Male (%)  535 (47.3)  3049 (40.8)  <0.001
#4.1 Calculate descriptive statistics (mean or percentages) for each variable using no weights or survey design, as well as with full survey design and weights.

#For educ variable 
##Raw frequencies - No Weights
table(brfss_17$hlthpln1, brfss_17$educ)
##    
##      0hs 2hsgrad 3somecol 4colgrad
##   0  314     372      262      178
##   1  535    1539     1956     3405
##Frequencies with Weights
teduc<-wtd.table(brfss_17$hlthpln1, brfss_17$educ, weights = brfss_17$mmsawt)
print(teduc)
##         0hs   2hsgrad  3somecol  4colgrad
## 0 1284062.4 1139395.3  881802.7  266394.6
## 1 1225410.5 2644849.0 3857401.4 3986029.0
##column percentages - No Weights
prop.table(table(brfss_17$hlthpln1, brfss_17$educ), margin=2)
##    
##            0hs    2hsgrad   3somecol   4colgrad
##   0 0.36984688 0.19466248 0.11812444 0.04967904
##   1 0.63015312 0.80533752 0.88187556 0.95032096
##Percentages - Weights
prop.table(wtd.table(brfss_17$hlthpln1, brfss_17$educ, weights = brfss_17$mmsawt), margin=2)
##          0hs    2hsgrad   3somecol   4colgrad
## 0 0.51168609 0.30108926 0.18606557 0.06264536
## 1 0.48831391 0.69891074 0.81393443 0.93735464
##basic chi square test of independence
chisq.test(table(brfss_17$hlthpln1, brfss_17$educ))
## 
##  Pearson's Chi-squared test
## 
## data:  table(brfss_17$hlthpln1, brfss_17$educ)
## X-squared = 702.45, df = 3, p-value < 2.2e-16
#For gender variable 
##Raw frequencies - No Weights
table(brfss_17$hlthpln1, brfss_17$gender)
##    
##     Female Male
##   0    596  535
##   1   4417 3049
##Frequencies - With Weights 
tgen<-wtd.table(brfss_17$hlthpln1, brfss_17$gender, weights = brfss_17$mmsawt)
print(tgen)
##    Female    Male
## 0 1566721 2012799
## 1 6302505 5452694
##column percentages - No weights
prop.table(table(brfss_17$hlthpln1, brfss_17$gender), margin=2)
##    
##        Female      Male
##   0 0.1188909 0.1492746
##   1 0.8811091 0.8507254
##Percentages - Weights
prop.table(wtd.table(brfss_17$chcocncr, brfss_17$sex, weights = brfss_17$mmsawt), margin=2)
##              1            2            9
## 1 0.0380162823 0.0633983186 0.0000000000
## 2 0.9603326609 0.9363069333 1.0000000000
## 7 0.0016510568 0.0001889718 0.0000000000
## 9 0.0000000000 0.0001057764 0.0000000000
##basic chi square test of independence
chisq.test(table(brfss_17$hlthpln1, brfss_17$gender))
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  table(brfss_17$hlthpln1, brfss_17$gender)
## X-squared = 16.622, df = 1, p-value = 4.563e-05
#4.2 Calculate percentages, or means, for each of your independent variables for each level of your outcome variable and present this in a table, with appropriate survey-corrected test statistics. (tableone package helps)

library(tableone)
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss_17)
options(survey.lonely.psu = "adjust")
st1<-svyCreateTableOne(vars = c("gender", "educ"), strata = "hlthpln1", test = T, data = des)
#st1<-print(st1, format="p")
print(st1, format="p")
##                    Stratified by hlthpln1
##                     0         1          p      test
##   n                 3579519.3 11755198.9            
##   gender = Male (%)      56.2       46.4  0.001     
##   educ (%)                               <0.001     
##      0hs                 36.0       10.5            
##      2hsgrad             31.9       22.6            
##      3somecol            24.7       32.9            
##      4colgrad             7.5       34.0
#4.3 Are there substantive differences in the descriptive results between the analysis using survey design and that not using survey design?
##Yes, the numbers are slightly larger in the responses to the table in 4.2, which did not include a survey design. Once the survey design is included, the weights gave me the higher numbers. For example, in table 4, 47.3% of males do not have insurance, whereas 56.2% of males do not have insurance in the table with the survey design. In the table without survey design, 7.2% of people without a high school diploma in Texas have insurance compared to 10.5% in the table with the survey design. Those numbers also change for people with high school diploma, where 20.7% people have insurance in the original table versus 22.6% in the table with the survey design.