Using ECLS-K data, I will assess the relationship between parent education, income, and race on poverty status.

#load required packages
library(car)
## Loading required package: carData
library(stargazer)
## 
## Please cite as:
##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## Loading required package: survival
## 
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
## 
##     dotchart
library(questionr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tableone)

#load brfss data
load("~/Google Drive/class_20_7283/data/eclsk_k5.Rdata")

#extract variables for assignment
mydat<-c("childid", "x_chsex_r", "x1locale", "x_raceth_r", "x4povty_i", "x4par1ed_i", "x4inccat_i", "x4par1rac", "x4height", "w4pf4str", "w4pf4psu", "w4pf40")

eclsk<-data.frame(eclskk5[,mydat])

rm(eclskk5)

Recode Variables

#parent 1 education
eclsk$educ<-Recode(eclsk$x4par1ed_i, recodes = "1='0Prim'; 2='1somehs'; 3='2hsgrad'; 4:5='3somecol'; 6:9='4colgrad'; -9=NA", as.factor=T)
eclsk$educ<-relevel(eclsk$educ, ref='2hsgrad')

#family income
eclsk$inc<-Recode(eclsk$x4inccat_i, recodes="1:3='<15,001'; 4:5='15,001-25k'; 6:7='25,001-35k'; 8:10='35,001-50k'; 11:18='>50k'; -9=NA",as.factor=T) 
eclsk$inc<-relevel(eclsk$inc, ref='>50k')

#race
eclsk$raceth<-Recode(eclsk$x4par1rac, recodes="1='nh white'; 2='nh black'; 3:4='hispanic'; 5:7='nh other'; 8='nh multirace'; -9=NA", as.factor=T)
eclsk$raceth<-relevel(eclsk$raceth, ref='nh white')

#poverty
eclsk$pov<-Recode(eclsk$x4povty_i, recodes ="1=1; 2:3=0; -9=NA")

summary(eclsk$educ)
##  2hsgrad    0Prim  1somehs 3somecol 4colgrad     NA's 
##     2809      661     1010     3979     4493     5222
summary(eclsk$inc)
##       >50k    <15,001 15,001-25k 25,001-35k 35,001-50k       NA's 
##       6534       1702       1871       1313       1532       5222
summary(eclsk$raceth)
##     nh white     hispanic     nh black nh multirace     nh other 
##         6958         2974         1402          218         1342 
##         NA's 
##         5280
summary(eclsk$pov)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.000   0.000   0.267   1.000   1.000    5222

Survey Design

sub<-eclsk%>%filter(complete.cases(.))


#survey design
options(survey.lonely.psu="adjust")
des<-svydesign(ids=~1, strata=~w4pf4str, weights=~w4pf40, data=sub)

Logit Model

fit.logit<-svyglm(pov~educ+inc+raceth, design=des, family=binomial)
## Warning in eval(family$initialize): non-integer #successes in a binomial
## glm!
summary(fit.logit)
## 
## Call:
## svyglm(formula = pov ~ educ + inc + raceth, design = des, family = binomial)
## 
## Survey design:
## svydesign(ids = ~1, strata = ~w4pf4str, weights = ~w4pf40, data = sub)
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -8.4802     1.0313  -8.223 3.01e-16 ***
## educ0Prim            0.9807     0.3715   2.640  0.00834 ** 
## educ1somehs          0.2301     0.3435   0.670  0.50297    
## educ3somecol        -0.1019     0.2699  -0.377  0.70586    
## educ4colgrad        -0.9361     0.3968  -2.359  0.01840 *  
## inc<15,001          12.5594     1.1547  10.876  < 2e-16 ***
## inc15,001-25k        8.9912     1.0214   8.803  < 2e-16 ***
## inc25,001-35k        7.4397     1.0303   7.221 6.64e-13 ***
## inc35,001-50k        4.9933     1.0654   4.687 2.91e-06 ***
## racethhispanic       0.6388     0.2696   2.369  0.01790 *  
## racethnh black       0.3275     0.3151   1.039  0.29876    
## racethnh multirace  -1.3380     0.5457  -2.452  0.01427 *  
## racethnh other       1.5499     0.5263   2.945  0.00326 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 0.8820426)
## 
## Number of Fisher Scoring iterations: 10

Odds Ratios and Confidence Intervals

knitr::kable(data.frame(OR=exp(coef(fit.logit)), ci=exp(confint(fit.logit))))
OR ci.2.5.. ci.97.5..
(Intercept) 2.075000e-04 2.750000e-05 1.566600e-03
educ0Prim 2.666424e+00 1.287331e+00 5.522915e+00
educ1somehs 1.258778e+00 6.419897e-01 2.468143e+00
educ3somecol 9.031314e-01 5.320916e-01 1.532906e+00
educ4colgrad 3.921429e-01 1.801577e-01 8.535638e-01
inc<15,001 2.847457e+05 2.961700e+04 2.737620e+06
inc15,001-25k 8.031989e+03 1.084945e+03 5.946185e+04
inc25,001-35k 1.702292e+03 2.259468e+02 1.282513e+04
inc35,001-50k 1.474175e+02 1.826782e+01 1.189629e+03
racethhispanic 1.894279e+00 1.116653e+00 3.213436e+00
racethnh black 1.387432e+00 7.482015e-01 2.572794e+00
racethnh multirace 2.623646e-01 9.003360e-02 7.645503e-01
racethnh other 4.711066e+00 1.679341e+00 1.321598e+01

In this case a household that makes 15,001-25k a year is more than 8x as likely to be in poverty than someone who makes more than $50k a year. Likewise, if someone is either Hispanic or NH Black they are 89% or 39% more likely to be in poverty than some who is NH white, respectively.

Comment: I’m not sure i used the correct strata and weights. The OR for inc 15,001-25k seems impossibly high. I also do no think I interpreted it correctly.