hw5

Author

Drew Schaefer

In this analysis I use data from National Health Interview Survey (NHIS) linked to the National Death Index (NDI). I use a 10-year sample from 2009-2018. The mortality link up includes deaths through December 31, 2019. The outcome variable that I am interested in is death.

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6     ✔ purrr   0.3.4
✔ tibble  3.1.8     ✔ dplyr   1.0.9
✔ tidyr   1.2.0     ✔ stringr 1.4.1
✔ readr   2.1.2     ✔ forcats 0.5.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Loading required package: grid

Loading required package: Matrix


Attaching package: 'Matrix'


The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Loading required package: survival


Attaching package: 'survey'


The following object is masked from 'package:graphics':

    dotchart


Loading required package: carData


Attaching package: 'car'


The following object is masked from 'package:dplyr':

    recode


The following object is masked from 'package:purrr':

    some


#BlackLivesMatter


Attaching package: 'psych'


The following object is masked from 'package:ggsurvfit':

    %+%


The following object is masked from 'package:car':

    logit


The following objects are masked from 'package:ggplot2':

    %+%, alpha
nhis <- read_ipums_ddi("nhis_00007.xml")
nhis_dat <- read_ipums_micro(nhis)
Use of data from IPUMS NHIS is subject to conditions including that users
should cite the data appropriately. Use command `ipums_conditions()` for more
details.
nhis_dat <- haven::zap_labels(nhis_dat)
nhis_dat <- nhis_dat %>%
  filter(MORTELIG == 1)

nhis_dat <- nhis_dat %>%
  mutate(death_age = ifelse( MORTSTAT ==1, 
                             MORTDODY - (YEAR - AGE), 
                             2019 - (YEAR - AGE)), 
         d.event = ifelse(MORTSTAT == 1, 1, 0))

nhis_dat$married <- Recode(nhis_dat$MARSTAT, recodes="00=NA; 10:13='married'; 20:40='sep'; 50='nm'; 99=NA" ,as.factor=T)

nhis_dat$male <- if_else(nhis_dat$SEX==1,1,0)

nhis_dat$age5 <- cut(nhis_dat$AGE,seq(15,85,5))

nhis_dat <- nhis_dat %>% mutate(age2 = AGE * AGE)

nhis_dat$race <- Recode(nhis_dat$RACENEW, recodes="100='wht'; 200 ='blk'; 300:617='other'; 900:990=NA", as.factor=T)

nhis_dat$college <- Recode(nhis_dat$EDUC, recodes= "000=NA; 100:202='hs or less';300:303='some coll';400:504='coll';else=NA", as.factor=T)

nhis_dat$black <- if_else(nhis_dat$race=='blk',1,0)

nhis_dat$other <- if_else(nhis_dat$race=='other',1,0)

nhis_dat$hs <- if_else(nhis_dat$college=='hs or less',1,0)

nhis_dat$col1 <- if_else(nhis_dat$college=='some coll',1,0)

nhis_dat$sep <- if_else(nhis_dat$married=='sep',1,0)

nhis_dat$nm <- if_else(nhis_dat$married=='nm',1,0)
subpp <- survSplit(Surv(death_age, d.event)~ ., data = nhis_dat,
                   cut = seq(18,100,5), episode = "mort_5_yr")
des2 <- svydesign(ids = ~PSU, strata = ~STRATA, weights = ~MORTWTSA,
                  data = subpp, nest = T)
#constant model
m0 <- svyglm(d.event ~ married+college,
             design = des2, family = binomial (link = "cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#general model
m1 <- svyglm(d.event ~factor(mort_5_yr)+married+college,
             design = des2, family = binomial (link = "cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#linear model
m2 <- svyglm(d.event~mort_5_yr+married+college,
             design = des2, family = binomial (link = "cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#quadratic model
m3 <- svyglm(d.event~mort_5_yr+I(mort_5_yr^2)+married+college,
             design = des2, family = binomial (link = "cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
newdat <- expand.grid(mort_5_yr = unique(subpp$mort_5_yr),
                      married = unique(subpp$married),
                      college = unique(subpp$college))
newdat <- newdat %>%
  filter(complete.cases(.))

newdat$h0<-predict(m0, newdata=newdat, type="response")
newdat$h1<-predict(m1, newdata=newdat, type="response")
newdat$h2<-predict(m2, newdata=newdat, type="response")
newdat$h3<-predict(m3, newdata=newdat, type="response")


head(newdat)
  mort_5_yr married   college          h0           h1           h2
1         1      nm some coll 0.003949921 2.998418e-05 0.0001395741
2         2      nm some coll 0.003949921 3.518150e-04 0.0002496623
3         3      nm some coll 0.003949921 9.781186e-04 0.0004465624
4         4      nm some coll 0.003949921 1.225002e-03 0.0007986888
5         5      nm some coll 0.003949921 1.780563e-03 0.0014282779
6         6      nm some coll 0.003949921 2.865865e-03 0.0025535239
            h3
1 0.0003353027
2 0.0004940912
3 0.0007432397
4 0.0011412739
5 0.0017888258
6 0.0028617065
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
The following object is masked from 'package:purrr':

    transpose
library(magrittr)

Attaching package: 'magrittr'
The following object is masked from 'package:purrr':

    set_names
The following object is masked from 'package:tidyr':

    extract
out<-melt(setDT(newdat),
          id = c("mort_5_yr", "married", "college"),
          measure.vars = list(haz=c("h0", "h1","h2","h3")))
head(out, n=20)
    mort_5_yr married   college variable       value
 1:         1      nm some coll       h0 0.003949921
 2:         2      nm some coll       h0 0.003949921
 3:         3      nm some coll       h0 0.003949921
 4:         4      nm some coll       h0 0.003949921
 5:         5      nm some coll       h0 0.003949921
 6:         6      nm some coll       h0 0.003949921
 7:         7      nm some coll       h0 0.003949921
 8:         8      nm some coll       h0 0.003949921
 9:         9      nm some coll       h0 0.003949921
10:        10      nm some coll       h0 0.003949921
11:        11      nm some coll       h0 0.003949921
12:        12      nm some coll       h0 0.003949921
13:        13      nm some coll       h0 0.003949921
14:        14      nm some coll       h0 0.003949921
15:        15      nm some coll       h0 0.003949921
16:        16      nm some coll       h0 0.003949921
17:        17      nm some coll       h0 0.003949921
18:         1 married some coll       h0 0.005188841
19:         2 married some coll       h0 0.005188841
20:         3 married some coll       h0 0.005188841
out%>%
  dplyr::filter(college == "coll")%>%
  dplyr::mutate(mod = dplyr::case_when(.$variable =="h0" ~ "Constant",
                          .$variable =="h1" ~ "General",
                          .$variable =="h2" ~ "Linear",
                          .$variable =="h3" ~ "Polynomial - 2"))%>%
  ggplot(aes(x = mort_5_yr*5, y=value ))+
  geom_line(aes(group=married, color=married) )+
  labs(title = "Hazard function for adult mortality",
       subtitle = "Alternative model specifications")+
  xlab("Age")+ylab("S(t)")+
  facet_wrap(~mod)#+ scale_y_continuous(trans = "log10")
Don't know how to automatically pick scale for object of type svystat. Defaulting to continuous.

The non-married individuals have the highest hazard functions of all marital statuses among adults with college education.

AIC0 <- AIC(m0)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC1 <- AIC(m1)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2 <- AIC(m2)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC3 <- AIC(m3)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICS<-data.frame(AIC = c(AIC0["AIC"],AIC1["AIC"],AIC2["AIC"],AIC3["AIC"]),
                   mod = factor(c("const", "general", "linear", "poly 2")) )

AICS$mod<-forcats::fct_relevel(AICS$mod,
                                 c("general", "const" , "linear", "poly2"))
Warning: 1 unknown level in `f`: poly2
AICS$deltaAIC<-AICS$AIC - AICS$AIC[AICS$mod=="general"]
  knitr::kable(AICS[, c("mod", "AIC", "deltaAIC")],
caption = "Relative AIC for alternative time specifications")
Relative AIC for alternative time specifications
mod AIC deltaAIC
const 389990.5 122490.750
general 267499.7 0.000
linear 269624.0 2124.340
poly 2 269119.8 1620.133

The general model is clearly the best.

interactmodel <- glm(d.event ~ married * college,
                    data=nhis_dat)

summary(interactmodel)

Call:
glm(formula = d.event ~ married * college, data = nhis_dat)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-0.17729  -0.08277  -0.05198  -0.03523   0.98078  

Coefficients:
                               Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   0.0352322  0.0008133  43.321  < 2e-16 ***
marriednm                    -0.0160126  0.0016218  -9.873  < 2e-16 ***
marriedsep                    0.0582433  0.0018213  31.980  < 2e-16 ***
collegehs or less             0.0475367  0.0010976  43.308  < 2e-16 ***
collegesome coll              0.0167462  0.0012052  13.895  < 2e-16 ***
marriednm:collegehs or less  -0.0293673  0.0020372 -14.415  < 2e-16 ***
marriedsep:collegehs or less  0.0362769  0.0022183  16.353  < 2e-16 ***
marriednm:collegesome coll   -0.0167112  0.0021587  -7.741 9.85e-15 ***
marriedsep:collegesome coll  -0.0019005  0.0024138  -0.787    0.431    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for gaussian family taken to be 0.0610511)

    Null deviance: 34164  on 540719  degrees of freedom
Residual deviance: 33011  on 540711  degrees of freedom
  (7720 observations deleted due to missingness)
AIC: 22631

Number of Fisher Scoring iterations: 2

There is interaction between marital status and education