library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ipumsr)
library(haven)
library(survival)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma
library(ggplot2)
library(ggpubr)
library(muhaz)
library(carData)
library(dplyr)
library(eha)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
nhis <- read_ipums_ddi("nhis_00009.xml")
nhis_dat <- read_ipums_micro(nhis)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
nhis_dat <- haven::zap_labels(nhis_dat)
nhis_dat <- nhis_dat %>%
  filter(MORTELIG == 1 & is.na (nhis_dat$RACENEW)==F)

nhis_dat <- nhis_dat %>%
  filter(SEX == 2)

nhis_dat <- nhis_dat%>%
filter(complete.cases(.))
nhis_dat$d.age <- ifelse(nhis_dat$MORTSTAT==1,nhis_dat$MORTDODY-(nhis_dat$YEAR-nhis_dat$AGE),
                         ifelse(nhis_dat$MORTSTAT==2,2017- (nhis_dat$YEAR-nhis_dat$AGE),NA))
nhis_dat$d.event <- ifelse(nhis_dat$MORTSTAT==1,1,0)
nhis_dat$timetodeath <- ifelse(nhis_dat$MORTSTAT==1,
                               nhis_dat$MORTDODY- nhis_dat$YEAR,
                               2017- nhis_dat$YEAR)
nhis_dat$d1yr <- ifelse (nhis_dat$timetodeath <=1 & nhis_dat$MORTSTAT==1,1,0)
nhis_dat$d5yr <- ifelse (nhis_dat$timetodeath <=5 & nhis_dat$MORTSTAT==1,1,0)
nhis_dat$MARST<- as.numeric(nhis_dat$MARST)

nhis_dat$MARST <- car:: Recode(nhis_dat$MARST, recodes= "11:40= 'married/ever married';  50= 'unmarried'; else= NA", as.factor= T)
nhis_dat$mwt<-nhis_dat$MORTWT/mean(nhis_dat$MORTWT, na.rm=T)
nhis_dat$age5<-cut(nhis_dat$AGE,seq(15,85, 5))
nhis_dat$race<-Recode(nhis_dat$RACENEW,
recodes ="10='wht'; 20 ='blk'; 30:61='other'; 97:99=NA",
as.factor=T)
nhis_dat$EDUC <- car:: Recode(nhis_dat$EDUC, recodes= "100:116= 'LHS';  201= 'High School'; 400='Bachelor'; 500:503='Graduate'; else= NA", as.factor= T)
nhis_dat$hisp<-Recode(nhis_dat$HISPYN, recodes="1=0; 2=1; else=NA")

nhis_dat$black<-ifelse(nhis_dat$race=='blk',1,0)
nhis_dat$oth<-ifelse(nhis_dat$race=='other',1,0)
nhis_dat$race_eth[nhis_dat$hisp == 0 & nhis_dat$race=="wht"]<-"NHWhite"                     
## Warning: Unknown or uninitialised column: `race_eth`.
nhis_dat$race_eth[nhis_dat$hisp == 0 & nhis_dat$race=="blk"]<-"NHBlack"
nhis_dat$race_eth[nhis_dat$hisp == 0 & nhis_dat$race=="other"]<-"NHother"
nhis_dat$race_eth[nhis_dat$hisp == 1 ]<-"Hispanic"
nhis_dat$race_eth[is.na(nhis_dat$hisp) ==T | is.na(nhis_dat$race)==T]<-NA                    


library(forcats)
nhis_dat$race_eth<-fct_relevel(nhis_dat$race_eth, c("NHWhite", "NHBlack" , "Hispanic", "NHother"))
## Warning: 3 unknown levels in `f`: NHWhite, NHBlack, and NHother

#Create person - period file :

subpp<-survSplit(Surv(d.age, d.event)~., data=nhis_dat,
cut=seq(18, 100, 5), episode="mort_5_yr")

#Analysis of death outcome:

des2<-svydesign(ids=~PSU, strata=~STRATA,
weights = ~MORTWTSA, data=subpp, nest=T)

constant model:

m0<-svyglm(d.event~ MARST+ EDUC,
design=des2, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

#general model:

m1<-svyglm(d.event~factor(mort_5_yr) + MARST+ EDUC,
design=des2, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

#linear model:

m2<-svyglm(d.event~mort_5_yr+MARST+EDUC, design=des2,
family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

#quadratic model:

m3<-svyglm(d.event~mort_5_yr+ I(mort_5_yr^2)+MARST+EDUC,
design=des2, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

#spline model:

library(splines)
m4<-svyglm(d.event~ns(mort_5_yr, df=3)+MARST+EDUC, design=des2,
family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Generating hazard plots:

newdat<-expand.grid(mort_5_yr = unique(subpp$mort_5_yr),
MARST= unique(subpp$MARST),
EDUC = unique(subpp$EDUC))
newdat<-newdat%>%
dplyr::filter(complete.cases(.))
newdat$h0<-predict(m0, newdata=newdat, type="response")
newdat$h1<-predict(m1, newdata=newdat, type="response")
newdat$h2<-predict(m2, newdata=newdat, type="response")
newdat$h3<-predict(m3, newdata=newdat, type="response")
newdat$h4<-predict(m4, newdata=newdat, type="response")
head(newdat)
##   mort_5_yr     MARST EDUC         h0           h1           h2           h3
## 1         1 unmarried  LHS 0.01366566 4.252137e-09 0.0001515967 0.0003056354
## 2         2 unmarried  LHS 0.01366566 3.170265e-04 0.0002739518 0.0004811518
## 3         3 unmarried  LHS 0.01366566 6.231514e-04 0.0004950366 0.0007675995
## 4         4 unmarried  LHS 0.01366566 1.481012e-03 0.0008944615 0.0012409231
## 5         5 unmarried  LHS 0.01366566 2.211903e-03 0.0016159055 0.0020327502
## 6         6 unmarried  LHS 0.01366566 3.312365e-03 0.0029183926 0.0033736637
##             h4
## 1 6.502085e-05
## 2 1.058818e-04
## 3 1.718903e-04
## 4 2.773363e-04
## 5 4.433532e-04
## 6 7.000697e-04
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
out<-melt(setDT(newdat),
id = c("mort_5_yr", "MARST", "EDUC"),
measure.vars = list(haz=c("h0", "h1","h2","h3", "h4")))
head(out, n=20)
##     mort_5_yr                MARST EDUC variable      value
##  1:         1            unmarried  LHS       h0 0.01366566
##  2:         2            unmarried  LHS       h0 0.01366566
##  3:         3            unmarried  LHS       h0 0.01366566
##  4:         4            unmarried  LHS       h0 0.01366566
##  5:         5            unmarried  LHS       h0 0.01366566
##  6:         6            unmarried  LHS       h0 0.01366566
##  7:         7            unmarried  LHS       h0 0.01366566
##  8:         8            unmarried  LHS       h0 0.01366566
##  9:         9            unmarried  LHS       h0 0.01366566
## 10:        10            unmarried  LHS       h0 0.01366566
## 11:        11            unmarried  LHS       h0 0.01366566
## 12:        12            unmarried  LHS       h0 0.01366566
## 13:        13            unmarried  LHS       h0 0.01366566
## 14:        14            unmarried  LHS       h0 0.01366566
## 15:        15            unmarried  LHS       h0 0.01366566
## 16:        16            unmarried  LHS       h0 0.01366566
## 17:        17            unmarried  LHS       h0 0.01366566
## 18:        18            unmarried  LHS       h0 0.01366566
## 19:         1 married/ever married  LHS       h0 0.02628689
## 20:         2 married/ever married  LHS       h0 0.02628689
library(ggplot2)

out%>%
dplyr::filter(EDUC == "High School")%>%
dplyr::mutate(mod = dplyr::case_when(.$variable =="h0" ~ "Constant",
.$variable =="h1" ~ "General",
.$variable =="h2" ~ "Linear",
.$variable =="h3" ~ "Polynomial - 2",
.$variable =="h4" ~ "Spline"))%>%
ggplot(aes(x = mort_5_yr*5, y=value ))+
geom_line(aes(group=MARST, color=MARST) )+
labs(title = "Hazard function for adult mortality",
subtitle = "Alternative model specifications")+
xlab("Age")+ylab("S(t)")+
facet_wrap(~mod)#+ scale_y_continuous(trans = "log10")
## Don't know how to automatically pick scale for object of type svystat. Defaulting to continuous.

AIC0<-AIC(m0)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC1<-AIC(m1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2<-AIC(m2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC3<-AIC(m3)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC4<-AIC(m4)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICS<-data.frame(AIC = c(AIC0["AIC"],AIC1["AIC"],AIC2["AIC"],AIC3["AIC"],AIC4["AIC"]),
mod = factor(c("const", "general", "linear", "poly 2", "spline")) )
AICS$mod<-forcats::fct_relevel(AICS$mod,
c("general", "const" , "linear", "poly2", "spline"))
## Warning: 1 unknown level in `f`: poly2
AICS$deltaAIC<-AICS$AIC - AICS$AIC[AICS$mod=="general"]
knitr::kable(AICS[, c("mod", "AIC", "deltaAIC")],
caption = "Relative AIC for alternative time specifications")
Relative AIC for alternative time specifications
mod AIC deltaAIC
const 511807.7 193054.321
general 318753.4 0.000
linear 320806.6 2053.194
poly 2 320534.0 1780.659
spline 320322.4 1569.085

Here,we see that nothing touches the AIC for the general model in this case.