library(car)
## Loading required package: carData
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
library(questionr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.3 v stringr 1.4.0
## v tidyr 1.1.1 v forcats 0.5.0
## v readr 1.3.1
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some() masks car::some()
## x tidyr::unpack() masks Matrix::unpack()
Is increased educational attainment associated with reduced odds of poverty? Also, is this relationship influenced by race/ethnicity?
library(foreign)
nhanes_18 <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/DEMO_J.XPT",
NULL)
nams<-names(nhanes_18)
head(nams, n=10)
## [1] "SEQN" "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
## [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM"
newnames<-tolower(gsub(pattern = "_",replacement = "",x = nams))
names(nhanes_18)<-newnames
#sex
nhanes_18$sex<-as.factor(ifelse(nhanes_18$riagendr==1,
"Male",
"Female"))
#race/ethnicity
nhanes_18$hispanic <- Recode(nhanes_18$ridreth3, recodes = "1=1; 2=1; else=0")
nhanes_18$black<-Recode(nhanes_18$ridreth3, recodes="4=1; else=0")
nhanes_18$white<-Recode(nhanes_18$ridreth3, recodes="3=1; else=0")
nhanes_18$asian<-Recode(nhanes_18$ridreth3, recodes="6=1; else=0")
nhanes_18$other<-Recode(nhanes_18$ridreth3, recodes="7=1; else=0")
# at or below poverty threshold using income to poverty ratio
nhanes_18$pov<-ifelse(nhanes_18$indfmpir <= 1, "at or below poverty",
ifelse(nhanes_18$indfmpir > 1, "above poverty", NA))
# education
nhanes_18$educ <- Recode(nhanes_18$dmdeduc2, recodes="1:2='1 lths'; 3='2 hs'; 4='3 some col'; 5='4 col grad'; 7:9=NA; else=NA", as.factor=T)
# marital status
nhanes_18$marst <- Recode(nhanes_18$dmdmartl, recodes="1='1 married'; 2='2 widowed'; 3='3 divorced'; 4='4 separated'; 5='5 nm'; 6='6 cohab'; else=NA", as.factor=T)
# age
nhanes_18$agec<-cut(nhanes_18$ridageyr,
breaks=c(0,17,65,80))
nhanes_18$pov2<- Recode(nhanes_18$pov, recodes= "'at or below poverty'=1; 'above poverty'=0", as.factor=T)
nhanes_18$race_eth<- Recode(nhanes_18$ridreth3, recodes= "3= '1 white'; 1:2='2 hispanic'; 4= '3 black'; 6= '4 asian'; 7= '5 other' ", as.factor=T )
sub<-nhanes_18 %>%
select(pov, pov2, agec,sex, marst, race_eth, educ, white, black, hispanic, asian, other, wtint2yr, sdmvpsu, sdmvstra) %>%
filter( complete.cases( . ))
#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids= ~sdmvpsu,
strata= ~sdmvstra,
weights= ~wtint2yr
, data = sub,
nest = TRUE)
Chi-square test of independence
svychisq(~pov2+educ,
design = des)
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: svychisq(~pov2 + educ, design = des)
## F = 50.189, ndf = 2.1222, ddf = 31.8328, p-value = 8.43e-11
cat<-svyby(formula = ~pov2,
by = ~educ,
design = des,
FUN = svymean,
na.rm=T)
# not sure why this code is not working
#cat%>%
#ggplot()+
#geom_point(aes(x=educ,y=pov2))+
#geom_errorbar(aes(x=educ, ymin = pov2-1.96*se,
#ymax= pov2+1.96*se),
#width=.25)+
#labs(title = "Percent % of US Adults Below Poverty Line by Education",
#caption = "Source: CDC BRFSS - NHANES Data 2017-18, Calculations by Daniel Mamani, M.S",
#x = "Education",
#y = "% Poverty")+
#theme_minimal()
#Logit model
fit.logit<-svyglm(pov2 ~ agec + race_eth + educ + marst ,
design = des,
family = binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
library(broom)
fit.logit%>%
tidy()%>%
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| (Intercept) | -1.446 | 0.209 | -6.907 | 0.020 |
| agec(65,80] | -0.647 | 0.144 | -4.502 | 0.046 |
| race_eth2 hispanic | 0.476 | 0.162 | 2.937 | 0.099 |
| race_eth3 black | 0.651 | 0.175 | 3.709 | 0.066 |
| race_eth4 asian | 0.563 | 0.265 | 2.121 | 0.168 |
| race_eth5 other | 0.626 | 0.198 | 3.156 | 0.087 |
| educ2 hs | -0.687 | 0.150 | -4.585 | 0.044 |
| educ3 some col | -1.350 | 0.205 | -6.584 | 0.022 |
| educ4 col grad | -2.677 | 0.219 | -12.202 | 0.007 |
| marst2 widowed | 0.676 | 0.232 | 2.918 | 0.100 |
| marst3 divorced | 0.963 | 0.130 | 7.437 | 0.018 |
| marst4 separated | 1.410 | 0.303 | 4.659 | 0.043 |
| marst5 nm | 0.905 | 0.142 | 6.387 | 0.024 |
| marst6 cohab | 0.903 | 0.220 | 4.098 | 0.055 |
Add OR’s and CI’s
fit.logit%>%
tidy()%>%
mutate(OR = exp(estimate),
LowerOR_Ci = exp(estimate - 1.96*std.error),
UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
knitr::kable(digits = 3)
| term | estimate | std.error | statistic | p.value | OR | LowerOR_Ci | UpperOR_Ci |
|---|---|---|---|---|---|---|---|
| (Intercept) | -1.446 | 0.209 | -6.907 | 0.020 | 0.236 | 0.156 | 0.355 |
| agec(65,80] | -0.647 | 0.144 | -4.502 | 0.046 | 0.523 | 0.395 | 0.694 |
| race_eth2 hispanic | 0.476 | 0.162 | 2.937 | 0.099 | 1.610 | 1.172 | 2.212 |
| race_eth3 black | 0.651 | 0.175 | 3.709 | 0.066 | 1.917 | 1.359 | 2.703 |
| race_eth4 asian | 0.563 | 0.265 | 2.121 | 0.168 | 1.755 | 1.044 | 2.952 |
| race_eth5 other | 0.626 | 0.198 | 3.156 | 0.087 | 1.869 | 1.268 | 2.757 |
| educ2 hs | -0.687 | 0.150 | -4.585 | 0.044 | 0.503 | 0.375 | 0.675 |
| educ3 some col | -1.350 | 0.205 | -6.584 | 0.022 | 0.259 | 0.173 | 0.387 |
| educ4 col grad | -2.677 | 0.219 | -12.202 | 0.007 | 0.069 | 0.045 | 0.106 |
| marst2 widowed | 0.676 | 0.232 | 2.918 | 0.100 | 1.966 | 1.249 | 3.097 |
| marst3 divorced | 0.963 | 0.130 | 7.437 | 0.018 | 2.621 | 2.033 | 3.378 |
| marst4 separated | 1.410 | 0.303 | 4.659 | 0.043 | 4.095 | 2.263 | 7.410 |
| marst5 nm | 0.905 | 0.142 | 6.387 | 0.024 | 2.472 | 1.872 | 3.263 |
| marst6 cohab | 0.903 | 0.220 | 4.098 | 0.055 | 2.467 | 1.602 | 3.800 |
#get a series of predicted probabilites for different "types" of people for each model
#ref_grid will generate all possible combinations of predictors from a model
library(emmeans)
rg<-ref_grid(fit.logit, data=nhanes_18)
marg_logit<-emmeans(object = rg,
specs = c( "race_eth", "educ"),
type="response" )
knitr::kable(marg_logit, digits = 4)
| race_eth | educ | prob | SE | df | asymp.LCL | asymp.UCL |
|---|---|---|---|---|---|---|
| 1 white | 1 lths | 0.2769 | 0.0289 | Inf | 0.2240 | 0.3370 |
| 2 hispanic | 1 lths | 0.3814 | 0.0360 | Inf | 0.3137 | 0.4541 |
| 3 black | 1 lths | 0.4233 | 0.0246 | Inf | 0.3760 | 0.4721 |
| 4 asian | 1 lths | 0.4020 | 0.0518 | Inf | 0.3059 | 0.5063 |
| 5 other | 1 lths | 0.4173 | 0.0487 | Inf | 0.3259 | 0.5147 |
| 1 white | 2 hs | 0.1615 | 0.0196 | Inf | 0.1267 | 0.2036 |
| 2 hispanic | 2 hs | 0.2367 | 0.0364 | Inf | 0.1728 | 0.3152 |
| 3 black | 2 hs | 0.2696 | 0.0378 | Inf | 0.2022 | 0.3496 |
| 4 asian | 2 hs | 0.2527 | 0.0400 | Inf | 0.1824 | 0.3387 |
| 5 other | 2 hs | 0.2647 | 0.0398 | Inf | 0.1943 | 0.3496 |
| 1 white | 3 some col | 0.0903 | 0.0120 | Inf | 0.0694 | 0.1166 |
| 2 hispanic | 3 some col | 0.1378 | 0.0240 | Inf | 0.0971 | 0.1918 |
| 3 black | 3 some col | 0.1598 | 0.0272 | Inf | 0.1134 | 0.2205 |
| 4 asian | 3 some col | 0.1484 | 0.0363 | Inf | 0.0903 | 0.2341 |
| 5 other | 3 some col | 0.1565 | 0.0336 | Inf | 0.1013 | 0.2340 |
| 1 white | 4 col grad | 0.0257 | 0.0051 | Inf | 0.0174 | 0.0378 |
| 2 hispanic | 4 col grad | 0.0407 | 0.0068 | Inf | 0.0292 | 0.0564 |
| 3 black | 4 col grad | 0.0481 | 0.0113 | Inf | 0.0302 | 0.0756 |
| 4 asian | 4 col grad | 0.0442 | 0.0129 | Inf | 0.0248 | 0.0775 |
| 5 other | 4 col grad | 0.0469 | 0.0103 | Inf | 0.0304 | 0.0718 |
comps<-as.data.frame(marg_logit)
comps[comps$race_eth=="1 white" & comps$educ == "4 col grad" , ]
## race_eth educ prob SE df asymp.LCL asymp.UCL
## 16 1 white 4 col grad 0.02567053 0.005101928 Inf 0.01735777 0.03781114
comps[comps$race_eth=="3 black" & comps$educ == "4 col grad" , ]
## race_eth educ prob SE df asymp.LCL asymp.UCL
## 18 3 black 4 col grad 0.04806878 0.01126543 Inf 0.03022505 0.07562529
comps[comps$race_eth=="1 white" & comps$educ == "1 lths" , ]
## race_eth educ prob SE df asymp.LCL asymp.UCL
## 1 1 white 1 lths 0.2769462 0.02891267 Inf 0.2239751 0.3370048
comps[comps$race_eth=="3 black" & comps$educ == "1 lths" , ]
## race_eth educ prob SE df asymp.LCL asymp.UCL
## 3 3 black 1 lths 0.423331 0.02459793 Inf 0.3759905 0.4721223
# The logistic regression model provides evidence of a negative relationship between educational attaiment and the odds of being in poerty. The predicted probabilities suggest that race influences this relationship given that Black individuals are roughly twice as likely to live below the poverty line than whites despite having the same education levels of education.