library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v stringr 1.4.0
## v tidyr 1.1.2 v forcats 0.5.1
## v readr 1.4.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
## x tidyr::unpack() masks Matrix::unpack()
load(url("https://github.com/coreysparks/data/blob/master/brfss_2017.Rdata?raw=true"))
#Using the Brfss_17 data wetry and use three variables to determine health; race, smoking, obesity to answer does race play a role in health disparities
nams<-names(brfss_17)
head(nams, n=10)
## [1] "dispcode" "statere1" "safetime" "hhadult" "genhlth" "physhlth"
## [7] "menthlth" "poorhlth" "hlthpln1" "persdoc2"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(brfss_17)<-newnames
#Poor or fair self rated health
brfss_17$badhealth<-Recode(brfss_17$genhlth,
recodes="4:5=1; 1:3=0; else=NA")
#race/ethnicity
brfss_17$black<-Recode(brfss_17$racegr3,
recodes="2=1; 9=NA; else=0")
brfss_17$white<-Recode(brfss_17$racegr3,
recodes="1=1; 9=NA; else=0")
brfss_17$other<-Recode(brfss_17$racegr3,
recodes="3:4=1; 9=NA; else=0")
brfss_17$hispanic<-Recode(brfss_17$racegr3,
recodes="5=1; 9=NA; else=0")
brfss_17$race_eth<-Recode(brfss_17$racegr3,
recodes="1='nhwhite'; 2='nh black'; 3='nh other';4='nh multirace'; 5='hispanic'; else=NA",
as.factor = T)
#BMI, in the brfss_17a the bmi variable has 2 implied decimal places, so we must divide by 100 to get real bmi's
brfss_17$bmi<-brfss_17$bmi5/100
brfss_17$obese<-ifelse(brfss_17$bmi>=30,
1,
0)
#smoking currently
brfss_17$smoke<-Recode(brfss_17$smoker3,
recodes="1:2 ='Current'; 3 ='Former';4 ='NeverSmoked'; else = NA",
as.factor=T)
brfss_17$smoke<-relevel(brfss_17$smoke,
ref = "NeverSmoked")
#smokes
brfss_17$smoking<-Recode(brfss_17$smokday2,
recodes= "1:2=1; 3=0; 7:9=NA")
sub<-brfss_17 %>%
select(badhealth,mmsaname, bmi, ,race_eth,white, black, hispanic, other, smoking, mmsawt, ststr) %>%
filter( complete.cases( . ))
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids= ~1,
strata= ~ststr,
weights= ~mmsawt
, data = sub )
#hispanic
mylogit <- glm(badhealth ~hispanic+ smoking + obese, data = brfss_17, family = "binomial")
summary(mylogit)
##
## Call:
## glm(formula = badhealth ~ hispanic + smoking + obese, family = "binomial",
## data = brfss_17)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1524 -0.7462 -0.6036 -0.6036 1.8935
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.61048 0.01269 -126.95 <2e-16 ***
## hispanic 0.43011 0.03006 14.31 <2e-16 ***
## smoking 0.47414 0.01705 27.81 <2e-16 ***
## obese 0.64712 0.01705 37.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 92217 on 84854 degrees of freedom
## Residual deviance: 89882 on 84851 degrees of freedom
## (146020 observations deleted due to missingness)
## AIC: 89890
##
## Number of Fisher Scoring iterations: 4
confint(mylogit)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -1.6354064 -1.5856769
## hispanic 0.3710369 0.4888621
## smoking 0.4407173 0.5075432
## obese 0.6136969 0.6805262
exp(coef(mylogit))
## (Intercept) hispanic smoking obese
## 0.1997909 1.5374292 1.6066353 1.9100324
library(broom)
mylogit%>%
tidy()%>%
mutate(OR = exp(estimate),
LowerOR_Ci = exp(estimate - 1.96*std.error),
UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
knitr::kable(digits = 3)
| (Intercept) |
-1.610 |
0.013 |
-126.948 |
0 |
0.200 |
0.195 |
0.205 |
| hispanic |
0.430 |
0.030 |
14.310 |
0 |
1.537 |
1.449 |
1.631 |
| smoking |
0.474 |
0.017 |
27.813 |
0 |
1.607 |
1.554 |
1.661 |
| obese |
0.647 |
0.017 |
37.958 |
0 |
1.910 |
1.847 |
1.975 |
library(sjPlot)
## Registered S3 methods overwritten by 'lme4':
## method from
## cooks.distance.influence.merMod car
## influence.merMod car
## dfbeta.influence.merMod car
## dfbetas.influence.merMod car
plot_model(mylogit,
title = "odds for getting poor health")

newdata <- with(brfss_17, data.frame(hispanic=mean(hispanic,na.rm=TRUE ), smoking=mean(smoking, na.rm=TRUE), obese=mean(obese, na.rm=TRUE)))
predict(mylogit, newdata, type="response")
## 1
## 0.2275014
#black
mylogit1 <- glm(badhealth ~black+ smoking + obese, data = brfss_17, family = "binomial")
summary(mylogit1)
##
## Call:
## glm(formula = badhealth ~ black + smoking + obese, family = "binomial",
## data = brfss_17)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1363 -0.7416 -0.6033 -0.6033 1.8939
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.61154 0.01267 -127.17 <2e-16 ***
## black 0.41559 0.02654 15.66 <2e-16 ***
## smoking 0.46121 0.01711 26.96 <2e-16 ***
## obese 0.63720 0.01708 37.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 92217 on 84854 degrees of freedom
## Residual deviance: 89842 on 84851 degrees of freedom
## (146020 observations deleted due to missingness)
## AIC: 89850
##
## Number of Fisher Scoring iterations: 4
confint(mylogit1)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -1.6364374 -1.5867625
## black 0.3634525 0.4675003
## smoking 0.4276694 0.4947277
## obese 0.6037168 0.6706559
exp(coef(mylogit1))
## (Intercept) black smoking obese
## 0.1995795 1.5152678 1.5859937 1.8911697
library(broom)
mylogit1%>%
tidy()%>%
mutate(OR = exp(estimate),
LowerOR_Ci = exp(estimate - 1.96*std.error),
UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
knitr::kable(digits = 3)
| (Intercept) |
-1.612 |
0.013 |
-127.171 |
0 |
0.200 |
0.195 |
0.205 |
| black |
0.416 |
0.027 |
15.658 |
0 |
1.515 |
1.438 |
1.596 |
| smoking |
0.461 |
0.017 |
26.961 |
0 |
1.586 |
1.534 |
1.640 |
| obese |
0.637 |
0.017 |
37.314 |
0 |
1.891 |
1.829 |
1.956 |
library(sjPlot)
plot_model(mylogit1,
title = "odds for getting poor health")

newdata1 <- with(brfss_17, data.frame(black=mean(black,na.rm=TRUE ), smoking=mean(smoking, na.rm=TRUE), obese=mean(obese, na.rm=TRUE)))
predict(mylogit1, newdata1, type="response")
## 1
## 0.2262858
#white
mylogit2 <- glm(badhealth ~white+ smoking + obese, data = brfss_17, family = "binomial")
summary(mylogit2)
##
## Call:
## glm(formula = badhealth ~ white + smoking + obese, family = "binomial",
## data = brfss_17)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1169 -0.7808 -0.5891 -0.5891 1.9167
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.21019 0.01986 -60.95 <2e-16 ***
## white -0.45323 0.01916 -23.66 <2e-16 ***
## smoking 0.43454 0.01720 25.26 <2e-16 ***
## obese 0.63159 0.01710 36.93 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 92217 on 84854 degrees of freedom
## Residual deviance: 89534 on 84851 degrees of freedom
## (146020 observations deleted due to missingness)
## AIC: 89542
##
## Number of Fisher Scoring iterations: 4
confint(mylogit2)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) -1.2491870 -1.1713547
## white -0.4907447 -0.4156417
## smoking 0.4008093 0.4682495
## obese 0.5980542 0.6650983
exp(coef(mylogit2))
## (Intercept) white smoking obese
## 0.2981392 0.6355696 1.5442573 1.8805899
library(broom)
mylogit2%>%
tidy()%>%
mutate(OR = exp(estimate),
LowerOR_Ci = exp(estimate - 1.96*std.error),
UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
knitr::kable(digits = 3)
| (Intercept) |
-1.210 |
0.020 |
-60.951 |
0 |
0.298 |
0.287 |
0.310 |
| white |
-0.453 |
0.019 |
-23.656 |
0 |
0.636 |
0.612 |
0.660 |
| smoking |
0.435 |
0.017 |
25.258 |
0 |
1.544 |
1.493 |
1.597 |
| obese |
0.632 |
0.017 |
36.928 |
0 |
1.881 |
1.819 |
1.945 |
library(sjPlot)
plot_model(mylogit2,
title = "odds for getting poor health")

newdata2 <- with(brfss_17, data.frame(white=mean(white,na.rm=TRUE ), smoking=mean(smoking, na.rm=TRUE), obese=mean(obese, na.rm=TRUE)))
predict(mylogit2, newdata2, type="response")
## 1
## 0.2285688
#results: The probability seems to be around 0.22 for each race, the issue of overfitting may have caused this similarity amongst each race