library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(questionr)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(tableone)

#The binary outcome variable is whether an individual smokes, recoded smok2 as smokers where whether they smoke everyday or someday they are still smokers

#the 2 predictor variables are insurance and education attainment

#question: does having less education have a impact on whether an individual smokes?

#Of the smoker are they insured?

load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss_17)
head(nams, n=10)
##  [1] "dispcode" "statere1" "safetime" "hhadult"  "genhlth"  "physhlth"
##  [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(brfss_17)<-newnames

#Poor or fair self rated health
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")

#sex
brfss_17$male<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))

#education level
brfss_17$educ<-Recode(brfss_17$educa, recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
brfss_17$educ<-relevel(brfss_17$educ, ref='2hsgrad')

#insurance
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")



#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")



#smoking
brfss_17$smokes<-Recode(brfss_17$smokday2, recodes="1:2=1;3=0; 7:9=0")

table(brfss_17$smokes, brfss_17$educ)
##    
##     2hsgrad 0Prim 1somehs 3somecol 4colgrad
##   0   15668  1114    2451    18299    24097
##   1   10574   763    2752     9579     5867
prop.table(table(brfss_17$smokes, brfss_17$educ), margin=2)
##    
##       2hsgrad     0Prim   1somehs  3somecol  4colgrad
##   0 0.5970582 0.5935003 0.4710744 0.6563957 0.8041984
##   1 0.4029418 0.4064997 0.5289256 0.3436043 0.1958016
chisq.test(table(brfss_17$smokes, brfss_17$educ))
## 
##  Pearson's Chi-squared test
## 
## data:  table(brfss_17$smokes, brfss_17$educ)
## X-squared = 4100, df = 4, p-value < 2.2e-16
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1

brfss_17<-brfss_17%>%
  filter(tx==1, is.na(mmsawt)==F, is.na(smokes)==F)

#
options(survey.lonely.psu = "adjust")

des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss_17 )



table1<-CreateTableOne(vars = c("educ", "ins", "male" ), strata= "smokes", test = T, data = brfss_17)
print(table1, format="p")
##                  Stratified by smokes
##                   0           1           p      test
##   n               2126        1014                   
##   educ (%)                                <0.001     
##      2hsgrad      22.0        29.1                   
##      0Prim         3.4         4.4                   
##      1somehs       3.9        11.2                   
##      3somecol     28.3        35.1                   
##      4colgrad     42.5        20.3                   
##   ins (mean (SD)) 0.92 (0.28) 0.76 (0.43) <0.001     
##   male = Male (%) 51.2        46.6         0.018
surveyt1<-svyCreateTableOne(vars = c("educ", "ins", "male" ), strata = "smokes", test = T, data = des)

print(surveyt1, format="p")
##                  Stratified by smokes
##                   0                 1                 p      test
##   n                2863196.6         2136165.3                   
##   educ (%)                                            <0.001     
##      2hsgrad            23.8              31.9                   
##      0Prim               7.1               7.1                   
##      1somehs             3.7              16.4                   
##      3somecol           36.9              34.7                   
##      4colgrad           28.6              10.0                   
##   ins (mean (SD))       0.84 (0.37)       0.66 (0.47) <0.001     
##   male = Male (%)       60.1              52.8         0.068
#the biggest difference among the descriptive results is the college graduate variable 
#for smokers and non-smokers
#the non-weighted doubles the smokers category and nearly doubles the non-smokers