library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(questionr)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(tableone)
#The binary outcome variable is whether an individual smokes, recoded smok2 as smokers where whether they smoke everyday or someday they are still smokers
#the 2 predictor variables are insurance and education attainment
#question: does having less education have a impact on whether an individual smokes?
#Of the smoker are they insured?
load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
#The names in the data are very ugly, so I make them less ugly
nams<-names(brfss_17)
head(nams, n=10)
## [1] "dispcode" "statere1" "safetime" "hhadult" "genhlth" "physhlth"
## [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(brfss_17)<-newnames
#Poor or fair self rated health
brfss_17$badhealth<-Recode(brfss_17$genhlth, recodes="4:5=1; 1:3=0; else=NA")
#sex
brfss_17$male<-as.factor(ifelse(brfss_17$sex==1, "Male", "Female"))
#education level
brfss_17$educ<-Recode(brfss_17$educa, recodes="1:2='0Prim'; 3='1somehs'; 4='2hsgrad'; 5='3somecol'; 6='4colgrad';9=NA", as.factor=T)
brfss_17$educ<-relevel(brfss_17$educ, ref='2hsgrad')
#insurance
brfss_17$ins<-Recode(brfss_17$hlthpln1, recodes ="7:9=NA; 1=1;2=0")
#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3, recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3, recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3, recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3, recodes="5=1; 9=NA; else=0")
#smoking
brfss_17$smokes<-Recode(brfss_17$smokday2, recodes="1:2=1;3=0; 7:9=0")
table(brfss_17$smokes, brfss_17$educ)
##
## 2hsgrad 0Prim 1somehs 3somecol 4colgrad
## 0 15668 1114 2451 18299 24097
## 1 10574 763 2752 9579 5867
prop.table(table(brfss_17$smokes, brfss_17$educ), margin=2)
##
## 2hsgrad 0Prim 1somehs 3somecol 4colgrad
## 0 0.5970582 0.5935003 0.4710744 0.6563957 0.8041984
## 1 0.4029418 0.4064997 0.5289256 0.3436043 0.1958016
chisq.test(table(brfss_17$smokes, brfss_17$educ))
##
## Pearson's Chi-squared test
##
## data: table(brfss_17$smokes, brfss_17$educ)
## X-squared = 4100, df = 4, p-value < 2.2e-16
brfss_17$tx<-NA
brfss_17$tx[grep(pattern = "TX", brfss_17$mmsaname)]<-1
brfss_17<-brfss_17%>%
filter(tx==1, is.na(mmsawt)==F, is.na(smokes)==F)
#
options(survey.lonely.psu = "adjust")
des<-svydesign(ids=~1, strata=~ststr, weights=~mmsawt, data = brfss_17 )
table1<-CreateTableOne(vars = c("educ", "ins", "male" ), strata= "smokes", test = T, data = brfss_17)
print(table1, format="p")
## Stratified by smokes
## 0 1 p test
## n 2126 1014
## educ (%) <0.001
## 2hsgrad 22.0 29.1
## 0Prim 3.4 4.4
## 1somehs 3.9 11.2
## 3somecol 28.3 35.1
## 4colgrad 42.5 20.3
## ins (mean (SD)) 0.92 (0.28) 0.76 (0.43) <0.001
## male = Male (%) 51.2 46.6 0.018
surveyt1<-svyCreateTableOne(vars = c("educ", "ins", "male" ), strata = "smokes", test = T, data = des)
print(surveyt1, format="p")
## Stratified by smokes
## 0 1 p test
## n 2863196.6 2136165.3
## educ (%) <0.001
## 2hsgrad 23.8 31.9
## 0Prim 7.1 7.1
## 1somehs 3.7 16.4
## 3somecol 36.9 34.7
## 4colgrad 28.6 10.0
## ins (mean (SD)) 0.84 (0.37) 0.66 (0.47) <0.001
## male = Male (%) 60.1 52.8 0.068
#the biggest difference among the descriptive results is the college graduate variable
#for smokers and non-smokers
#the non-weighted doubles the smokers category and nearly doubles the non-smokers