Stats HW3

library(car)

## Loading required package: carData

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(survey)

## Loading required package: grid

## Loading required package: Matrix

## Loading required package: survival

## 
## Attaching package: 'survey'

## The following object is masked from 'package:graphics':
## 
##     dotchart

library(questionr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following object is masked from 'package:car':
## 
##     recode

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyverse)

## -- Attaching packages ------------------------------------------------ tidyverse 1.3.0 --

## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.3     v stringr 1.4.0
## v tidyr   1.1.1     v forcats 0.5.0
## v readr   1.3.1

## -- Conflicts --------------------------------------------------- tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x tidyr::pack()   masks Matrix::pack()
## x dplyr::recode() masks car::recode()
## x purrr::some()   masks car::some()
## x tidyr::unpack() masks Matrix::unpack()

Research Question:

Is increased educational attainment associated with reduced odds of poverty? Also, is this relationship influenced by race/ethnicity?

Load data

library(foreign)
nhanes_18 <- read.xport("C:/Users/maman/OneDrive/DEM Spring 2021/DEM 7283 Stats 2/DEMO_J.XPT", 
    NULL)

nams<-names(nhanes_18)
head(nams, n=10)

##  [1] "SEQN"     "SDDSRVYR" "RIDSTATR" "RIAGENDR" "RIDAGEYR" "RIDAGEMN"
##  [7] "RIDRETH1" "RIDRETH3" "RIDEXMON" "RIDEXAGM"

newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(nhanes_18)<-newnames

Recode variables

#sex
nhanes_18$sex<-as.factor(ifelse(nhanes_18$riagendr==1,
                                "Male",
                                "Female"))
#race/ethnicity
nhanes_18$hispanic <- Recode(nhanes_18$ridreth3, recodes = "1=1; 2=1; else=0")
nhanes_18$black<-Recode(nhanes_18$ridreth3, recodes="4=1; else=0")
nhanes_18$white<-Recode(nhanes_18$ridreth3, recodes="3=1; else=0")
nhanes_18$asian<-Recode(nhanes_18$ridreth3, recodes="6=1; else=0")
nhanes_18$other<-Recode(nhanes_18$ridreth3, recodes="7=1; else=0")

# at or below poverty threshold using income to poverty ratio
nhanes_18$pov<-ifelse(nhanes_18$indfmpir <= 1, "at or below poverty", 
                      ifelse(nhanes_18$indfmpir > 1, "above poverty", NA))

# education
nhanes_18$educ <- Recode(nhanes_18$dmdeduc2, recodes="1:2='1 lths'; 3='2 hs'; 4='3 some col'; 5='4 col grad'; 7:9=NA; else=NA", as.factor=T)

# marital status
nhanes_18$marst <- Recode(nhanes_18$dmdmartl, recodes="1='1 married'; 2='2 widowed'; 3='3 divorced'; 4='4 separated'; 5='5 nm'; 6='6 cohab'; else=NA", as.factor=T)

# age
nhanes_18$agec<-cut(nhanes_18$ridageyr,
                   breaks=c(0,17,65,80))

nhanes_18$pov2<- Recode(nhanes_18$pov, recodes= "'at or below poverty'=1; 'above poverty'=0", as.factor=T)
nhanes_18$race_eth<- Recode(nhanes_18$ridreth3, recodes= "3= '1 white'; 1:2='2 hispanic'; 4= '3 black'; 6= '4 asian'; 7= '5 other' ", as.factor=T )

Analysis

sub<-nhanes_18 %>%
  select(pov, pov2, agec,sex, marst, race_eth, educ, white, black, hispanic, asian, other, wtint2yr, sdmvpsu, sdmvstra) %>%
  filter( complete.cases( . ))

#First we tell R our survey design
options(survey.lonely.psu = "adjust")
des<-svydesign(ids= ~sdmvpsu,
               strata= ~sdmvstra,
               weights= ~wtint2yr
               , data = sub,
               nest = TRUE)

Chi-square test of independence

svychisq(~pov2+educ,
         design = des)

## 
##  Pearson's X^2: Rao & Scott adjustment
## 
## data:  svychisq(~pov2 + educ, design = des)
## F = 50.189, ndf = 2.1222, ddf = 31.8328, p-value = 8.43e-11

Plotting estimates with standard errors

cat<-svyby(formula = ~pov2,
           by = ~educ,
           design = des,
           FUN = svymean,
           na.rm=T)

# not sure why this code is not working
#cat%>%
  #ggplot()+
  #geom_point(aes(x=educ,y=pov2))+
  #geom_errorbar(aes(x=educ, ymin = pov2-1.96*se, 
                    #ymax= pov2+1.96*se),
                #width=.25)+
   #labs(title = "Percent % of US Adults Below Poverty Line by Education", 
        #caption = "Source: CDC BRFSS - NHANES Data 2017-18, Calculations by Daniel Mamani, M.S",
       #x = "Education",
       #y = "% Poverty")+
  #theme_minimal()

Logistic Regression model

#Logit model
fit.logit<-svyglm(pov2 ~ agec + race_eth + educ + marst ,
                  design = des,
                  family = binomial)

## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

library(broom)
fit.logit%>%
  tidy()%>%
  knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value
(Intercept)	-1.446	0.209	-6.907	0.020
agec(65,80]	-0.647	0.144	-4.502	0.046
race_eth2 hispanic	0.476	0.162	2.937	0.099
race_eth3 black	0.651	0.175	3.709	0.066
race_eth4 asian	0.563	0.265	2.121	0.168
race_eth5 other	0.626	0.198	3.156	0.087
educ2 hs	-0.687	0.150	-4.585	0.044
educ3 some col	-1.350	0.205	-6.584	0.022
educ4 col grad	-2.677	0.219	-12.202	0.007
marst2 widowed	0.676	0.232	2.918	0.100
marst3 divorced	0.963	0.130	7.437	0.018
marst4 separated	1.410	0.303	4.659	0.043
marst5 nm	0.905	0.142	6.387	0.024
marst6 cohab	0.903	0.220	4.098	0.055

Add OR’s and CI’s

fit.logit%>%
  tidy()%>%
  mutate(OR = exp(estimate),
         LowerOR_Ci = exp(estimate - 1.96*std.error),
         UpperOR_Ci = exp(estimate + 1.96*std.error))%>%
  knitr::kable(digits = 3)

term	estimate	std.error	statistic	p.value	OR	LowerOR_Ci	UpperOR_Ci
(Intercept)	-1.446	0.209	-6.907	0.020	0.236	0.156	0.355
agec(65,80]	-0.647	0.144	-4.502	0.046	0.523	0.395	0.694
race_eth2 hispanic	0.476	0.162	2.937	0.099	1.610	1.172	2.212
race_eth3 black	0.651	0.175	3.709	0.066	1.917	1.359	2.703
race_eth4 asian	0.563	0.265	2.121	0.168	1.755	1.044	2.952
race_eth5 other	0.626	0.198	3.156	0.087	1.869	1.268	2.757
educ2 hs	-0.687	0.150	-4.585	0.044	0.503	0.375	0.675
educ3 some col	-1.350	0.205	-6.584	0.022	0.259	0.173	0.387
educ4 col grad	-2.677	0.219	-12.202	0.007	0.069	0.045	0.106
marst2 widowed	0.676	0.232	2.918	0.100	1.966	1.249	3.097
marst3 divorced	0.963	0.130	7.437	0.018	2.621	2.033	3.378
marst4 separated	1.410	0.303	4.659	0.043	4.095	2.263	7.410
marst5 nm	0.905	0.142	6.387	0.024	2.472	1.872	3.263
marst6 cohab	0.903	0.220	4.098	0.055	2.467	1.602	3.800

Predicted probabilities for interesting cases

#get a series of predicted probabilites for different "types" of people for each model
#ref_grid will generate all possible combinations of predictors from a model

library(emmeans)
rg<-ref_grid(fit.logit, data=nhanes_18)

marg_logit<-emmeans(object = rg,
              specs = c( "race_eth",  "educ"),
              type="response" )

knitr::kable(marg_logit,  digits = 4)

race_eth	educ	prob	SE	df	asymp.LCL	asymp.UCL
1 white	1 lths	0.2769	0.0289	Inf	0.2240	0.3370
2 hispanic	1 lths	0.3814	0.0360	Inf	0.3137	0.4541
3 black	1 lths	0.4233	0.0246	Inf	0.3760	0.4721
4 asian	1 lths	0.4020	0.0518	Inf	0.3059	0.5063
5 other	1 lths	0.4173	0.0487	Inf	0.3259	0.5147
1 white	2 hs	0.1615	0.0196	Inf	0.1267	0.2036
2 hispanic	2 hs	0.2367	0.0364	Inf	0.1728	0.3152
3 black	2 hs	0.2696	0.0378	Inf	0.2022	0.3496
4 asian	2 hs	0.2527	0.0400	Inf	0.1824	0.3387
5 other	2 hs	0.2647	0.0398	Inf	0.1943	0.3496
1 white	3 some col	0.0903	0.0120	Inf	0.0694	0.1166
2 hispanic	3 some col	0.1378	0.0240	Inf	0.0971	0.1918
3 black	3 some col	0.1598	0.0272	Inf	0.1134	0.2205
4 asian	3 some col	0.1484	0.0363	Inf	0.0903	0.2341
5 other	3 some col	0.1565	0.0336	Inf	0.1013	0.2340
1 white	4 col grad	0.0257	0.0051	Inf	0.0174	0.0378
2 hispanic	4 col grad	0.0407	0.0068	Inf	0.0292	0.0564
3 black	4 col grad	0.0481	0.0113	Inf	0.0302	0.0756
4 asian	4 col grad	0.0442	0.0129	Inf	0.0248	0.0775
5 other	4 col grad	0.0469	0.0103	Inf	0.0304	0.0718

comps<-as.data.frame(marg_logit)

comps[comps$race_eth=="1 white" & comps$educ == "4 col grad" , ]

##    race_eth       educ       prob          SE  df  asymp.LCL  asymp.UCL
## 16  1 white 4 col grad 0.02567053 0.005101928 Inf 0.01735777 0.03781114

comps[comps$race_eth=="3 black" & comps$educ == "4 col grad" , ]

##    race_eth       educ       prob         SE  df  asymp.LCL  asymp.UCL
## 18  3 black 4 col grad 0.04806878 0.01126543 Inf 0.03022505 0.07562529

comps[comps$race_eth=="1 white" & comps$educ == "1 lths" , ]

##   race_eth   educ      prob         SE  df asymp.LCL asymp.UCL
## 1  1 white 1 lths 0.2769462 0.02891267 Inf 0.2239751 0.3370048

comps[comps$race_eth=="3 black" & comps$educ == "1 lths" , ]

##   race_eth   educ     prob         SE  df asymp.LCL asymp.UCL
## 3  3 black 1 lths 0.423331 0.02459793 Inf 0.3759905 0.4721223

Conclusion

# The logistic regression model provides evidence of a negative relationship between educational attaiment and the odds of being in poerty. The predicted probabilities suggest that race influences this relationship given that Black individuals are roughly twice as likely to live below the poverty line than whites despite having the same education levels of education.

Stats HW3

Daniel Mamani

2/13/2021

Research Question:

Load data

Recode variables

Analysis

Plotting estimates with standard errors

Logistic Regression model

Predicted probabilities for interesting cases

Conclusion