library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.1     v forcats 0.5.0
## v readr   1.3.1
## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()
## x tidyr::unpack() masks Matrix::unpack()

Research Question:

Is increased educational attainment associated with reduced odds of poverty? Also, is this relationship influenced by race/ethnicity?

Load data

library(foreign)
nhanes_18 <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/DEMO_J.XPT", 
    NULL)
nams<-names(nhanes_18)
head(nams, n=10)
##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM"
newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(nhanes_18)<-newnames

Recode variables

#sex
nhanes_18$sex<-as.factor(ifelse(nhanes_18$riagendr==1,
                                "Male",
                                "Female"))
#race/ethnicity
nhanes_18$hispanic <- Recode(nhanes_18$ridreth3, recodes = "1=1; 2=1; else=0")
nhanes_18$black<-Recode(nhanes_18$ridreth3, recodes="4=1; else=0")
nhanes_18$white<-Recode(nhanes_18$ridreth3, recodes="3=1; else=0")
nhanes_18$asian<-Recode(nhanes_18$ridreth3, recodes="6=1; else=0")
nhanes_18$other<-Recode(nhanes_18$ridreth3, recodes="7=1; else=0")

# at or below poverty threshold using income to poverty ratio
nhanes_18$pov<-ifelse(nhanes_18$indfmpir <= 1, "at or below poverty", 
                      ifelse(nhanes_18$indfmpir > 1, "above poverty", NA))

# education
nhanes_18$educ <- Recode(nhanes_18$dmdeduc2, recodes="1:2='1 lths'; 3='2 hs'; 4='3 some col'; 5='4 col grad'; 7:9=NA; else=NA", as.factor=T)

# marital status
nhanes_18$marst <- Recode(nhanes_18$dmdmartl, recodes="1='1 married'; 2='2 widowed'; 3='3 divorced'; 4='4 separated'; 5='5 nm'; 6='6 cohab'; else=NA", as.factor=T)

# age
nhanes_18$agec<-cut(nhanes_18$ridageyr,
                   breaks=c(0,17,65,80))
nhanes_18$pov2<- Recode(nhanes_18$pov, recodes= "'at or below poverty'=1; 'above poverty'=0", as.factor=T)
nhanes_18$race_eth<- Recode(nhanes_18$ridreth3, recodes= "3= '1 white'; 1:2='2 hispanic'; 4= '3 black'; 6= '4 asian'; 7= '5 other' ", as.factor=T )

Analysis

sub<-nhanes_18 %>%
  select(pov, pov2, agec,sex, marst, race_eth, educ, white, black, hispanic, asian, other, wtint2yr, sdmvpsu, sdmvstra) %>%
  filter( complete.cases( . ))

#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids= ~sdmvpsu,
               strata= ~sdmvstra,
               weights= ~wtint2yr
               , data = sub,
               nest = TRUE)

Chi-square test of independence

svychisq(~pov2+educ,
         design = des)
## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~pov2 + educ, design = des)
## F = 50.189, ndf = 2.1222, ddf = 31.8328, p-value = 8.43e-11

Plotting estimates with standard errors

cat<-svyby(formula = ~pov2,
           by = ~educ,
           design = des,
           FUN = svymean,
           na.rm=T)

# not sure why this code is not working
#cat%>%
  #ggplot()+
  #geom_point(aes(x=educ,y=pov2))+
  #geom_errorbar(aes(x=educ, ymin = pov2-1.96*se, 
                    #ymax= pov2+1.96*se),
                #width=.25)+
   #labs(title = "Percent % of US Adults Below Poverty Line by Education", 
        #caption = "Source: CDC BRFSS - NHANES Data 2017-18, Calculations by Daniel Mamani, M.S",
       #x = "Education",
       #y = "% Poverty")+
  #theme_minimal()

Logistic Regression model

#Logit model
fit.logit<-svyglm(pov2 ~ agec + race_eth + educ + marst ,
                  design = des,
                  family = binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
library(broom)
fit.logit%>%
  tidy()%>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value
(Intercept) -1.446 0.209 -6.907 0.020
agec(65,80] -0.647 0.144 -4.502 0.046
race_eth2 hispanic 0.476 0.162 2.937 0.099
race_eth3 black 0.651 0.175 3.709 0.066
race_eth4 asian 0.563 0.265 2.121 0.168
race_eth5 other 0.626 0.198 3.156 0.087
educ2 hs -0.687 0.150 -4.585 0.044
educ3 some col -1.350 0.205 -6.584 0.022
educ4 col grad -2.677 0.219 -12.202 0.007
marst2 widowed 0.676 0.232 2.918 0.100
marst3 divorced 0.963 0.130 7.437 0.018
marst4 separated 1.410 0.303 4.659 0.043
marst5 nm 0.905 0.142 6.387 0.024
marst6 cohab 0.903 0.220 4.098 0.055

Add OR’s and CI’s

fit.logit%>%
  tidy()%>%
  mutate(OR = exp(estimate),
         LowerOR_Ci = exp(estimate - 1.96*std.error),
         UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
  knitr::kable(digits = 3)
term estimate std.error statistic p.value OR LowerOR_Ci UpperOR_Ci
(Intercept) -1.446 0.209 -6.907 0.020 0.236 0.156 0.355
agec(65,80] -0.647 0.144 -4.502 0.046 0.523 0.395 0.694
race_eth2 hispanic 0.476 0.162 2.937 0.099 1.610 1.172 2.212
race_eth3 black 0.651 0.175 3.709 0.066 1.917 1.359 2.703
race_eth4 asian 0.563 0.265 2.121 0.168 1.755 1.044 2.952
race_eth5 other 0.626 0.198 3.156 0.087 1.869 1.268 2.757
educ2 hs -0.687 0.150 -4.585 0.044 0.503 0.375 0.675
educ3 some col -1.350 0.205 -6.584 0.022 0.259 0.173 0.387
educ4 col grad -2.677 0.219 -12.202 0.007 0.069 0.045 0.106
marst2 widowed 0.676 0.232 2.918 0.100 1.966 1.249 3.097
marst3 divorced 0.963 0.130 7.437 0.018 2.621 2.033 3.378
marst4 separated 1.410 0.303 4.659 0.043 4.095 2.263 7.410
marst5 nm 0.905 0.142 6.387 0.024 2.472 1.872 3.263
marst6 cohab 0.903 0.220 4.098 0.055 2.467 1.602 3.800

Predicted probabilities for interesting cases

#get a series of predicted probabilites for different "types" of people for each model
#ref_grid will generate all possible combinations of predictors from a model

library(emmeans)
rg<-ref_grid(fit.logit, data=nhanes_18)

marg_logit<-emmeans(object = rg,
              specs = c( "race_eth",  "educ"),
              type="response" )

knitr::kable(marg_logit,  digits = 4)
race_eth educ prob SE df asymp.LCL asymp.UCL
1 white 1 lths 0.2769 0.0289 Inf 0.2240 0.3370
2 hispanic 1 lths 0.3814 0.0360 Inf 0.3137 0.4541
3 black 1 lths 0.4233 0.0246 Inf 0.3760 0.4721
4 asian 1 lths 0.4020 0.0518 Inf 0.3059 0.5063
5 other 1 lths 0.4173 0.0487 Inf 0.3259 0.5147
1 white 2 hs 0.1615 0.0196 Inf 0.1267 0.2036
2 hispanic 2 hs 0.2367 0.0364 Inf 0.1728 0.3152
3 black 2 hs 0.2696 0.0378 Inf 0.2022 0.3496
4 asian 2 hs 0.2527 0.0400 Inf 0.1824 0.3387
5 other 2 hs 0.2647 0.0398 Inf 0.1943 0.3496
1 white 3 some col 0.0903 0.0120 Inf 0.0694 0.1166
2 hispanic 3 some col 0.1378 0.0240 Inf 0.0971 0.1918
3 black 3 some col 0.1598 0.0272 Inf 0.1134 0.2205
4 asian 3 some col 0.1484 0.0363 Inf 0.0903 0.2341
5 other 3 some col 0.1565 0.0336 Inf 0.1013 0.2340
1 white 4 col grad 0.0257 0.0051 Inf 0.0174 0.0378
2 hispanic 4 col grad 0.0407 0.0068 Inf 0.0292 0.0564
3 black 4 col grad 0.0481 0.0113 Inf 0.0302 0.0756
4 asian 4 col grad 0.0442 0.0129 Inf 0.0248 0.0775
5 other 4 col grad 0.0469 0.0103 Inf 0.0304 0.0718
comps<-as.data.frame(marg_logit)

comps[comps$race_eth=="1 white" & comps$educ == "4 col grad" , ]
##    race_eth       educ       prob          SE  df  asymp.LCL  asymp.UCL
## 16  1 white 4 col grad 0.02567053 0.005101928 Inf 0.01735777 0.03781114
comps[comps$race_eth=="3 black" & comps$educ == "4 col grad" , ]
##    race_eth       educ       prob         SE  df  asymp.LCL  asymp.UCL
## 18  3 black 4 col grad 0.04806878 0.01126543 Inf 0.03022505 0.07562529
comps[comps$race_eth=="1 white" & comps$educ == "1 lths" , ]
##   race_eth   educ      prob         SE  df asymp.LCL asymp.UCL
## 1  1 white 1 lths 0.2769462 0.02891267 Inf 0.2239751 0.3370048
comps[comps$race_eth=="3 black" & comps$educ == "1 lths" , ]
##   race_eth   educ     prob         SE  df asymp.LCL asymp.UCL
## 3  3 black 1 lths 0.423331 0.02459793 Inf 0.3759905 0.4721223

Conclusion

# The logistic regression model provides evidence of a negative relationship between educational attaiment and the odds of being in poerty. The predicted probabilities suggest that race influences this relationship given that Black individuals are roughly twice as likely to live below the poverty line than whites despite having the same education levels of education.