Homework 5c

Author

Jules Gonzalez

Discrete Time Models

1.Fit the discrete time hazard model to your outcome

a)You must form a person-period data set

b)Consider both the general model and other time specifications

c)Include all main effects in the model

d)Test for an interaction between at least two of the predictors e)Generate hazard plots for interesting cases highlighting the significant predictors in your analysis

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.2      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(haven)
library(survival)
library(car)
Loading required package: carData

Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

The following object is masked from 'package:purrr':

    some
library(survey)
Loading required package: grid
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'survey'

The following object is masked from 'package:graphics':

    dotchart
library(muhaz)
library(eha)
library(ipumsr)

ddi <- read_ipums_ddi("nhis_00008.xml")
alc <- read_ipums_micro(ddi)
Use of data from IPUMS NHIS is subject to conditions including that users
should cite the data appropriately. Use command `ipums_conditions()` for more
details.
alc<-haven::zap_labels(alc)

nams<-names(alc)

newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(alc)<-newnames
library(dplyr)

#rent or own 
alc$home<-Recode(alc$ownership, recodes="10:12='own'; 20='rent'; else=NA", as.factor=T)

#us citizen
alc$us<-Recode(alc$citizen, recodes="1='nous'; 2='yesus'; else=NA", as.factor=T)
summary(alc$us)
  nous  yesus   NA's 
 94603 890065   8119 
#marital status
alc$marital<-car::Recode(as.numeric(alc$marst),recodes = "10:13='married'; 20='widowed'; 30='divorced'; 40='seperated';50='never married';else=NA")

 #sex                                    
alc$sex<-Recode(alc$sex, recodes="1='male'; 2='female'; else=NA", as.factor=T)

#smoker status
alc$smoke<-car::Recode(as.numeric(alc$smokestatus1),recodes = "10='never smoked'; 20='current smoker'; 30='former smoker';else=NA")

#asthma_ER
alc$aer<-car::Recode(as.numeric(alc$astheryr),recodes = "1='no er'; 2='yes er';else=NA")

#asthma attack
alc$aattack<-car::Recode(as.numeric(alc$asthatakyr),recodes = "1='no'; 2='yes';else=NA")

#poverty level
alc$povl<-car::Recode(as.numeric(alc$pooryn),recodes = "1='at or above povl'; 2='below povl';else=NA")

#education
alc$edu<-car::Recode(as.numeric(alc$educrec1),recodes = "01:13='lths'; 14='somecollege'; 15='bachelors';16='five+ yrs of college';else=NA")

#ever had lung cancer
alc$lung<-car::Recode(as.numeric(alc$cnlung),recodes = "0:1=0; 2=1;else=NA")

alc$aev<-car::Recode(as.numeric(alc$asthmaev),recodes = "0:1=0; 2=1;else=NA")


#did they die?
alc$d.event<-ifelse(alc$mortstat==1,1,0) 

#time of death post interview
alc$timetodeath<-ifelse(alc$mortstat ==1,
alc$mortdody-alc$year,
2002 - alc$year )

#die 1 yr or 5 yrs 
alc$d1yr<-ifelse(alc$timetodeath<=1&alc$mortstat==1, 1,0) 
alc$d5yr<-ifelse(alc$timetodeath<=5&alc$mortstat==1, 1,0)

#age
alc$age5<-cut(alc$age,seq(15,85, 5))

#coding age at death
alc$d.age<-ifelse(alc$mortstat==1,alc$mortdody-(alc$year-alc$age) ,
ifelse(alc$mortstat==2,2002-(alc$year-alc$age), NA))

#race
alc$race<-Recode(alc$racenew,
recodes ="100='wht'; 200 ='blk'; 300:542='other'; 997:999=NA",
as.factor=T)

alc <- alc %>%
  filter(mortelig == 1, mortdody<9999, is.na(race)==F, is.na(edu)==F)
alc <- alc %>%
  mutate(death_time = ifelse( mortelig ==1, 
                             mortdody - year , 
                             2002:2012 - year ), 
         d.event = ifelse(mortelig == 1 & death_time <= 5, 1, 0))

library(survival)

time_fit <- survfit(Surv(death_time, d.event) ~ 1, 
                  conf.type = "log",
                   data = alc)

library(ggsurvfit)

time_fit %>%
  ggsurvfit()+
  xlim(-1, 6) +
  add_censor_mark() +
  add_confidence_interval()+
  add_quantile(y_value = .975)
Warning: Removed 11 row(s) containing missing values (geom_path).

Person-Period File

p2<-survSplit(Surv(d.age, d.event)~., data=alc,
cut=seq(18, 100, 5), episode="mort_5_yr")
des5<-svydesign(ids=~psu, strata=~strata,
weights = ~mortwtsa, data=p2, nest=T)

Constant Model

constant<-svyglm(d.event~ race+edu,
design=des5, family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

General Model

general<-svyglm(d.event~factor(mort_5_yr) + race+edu,
design=des5, family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Linear Model

linear<-svyglm(d.event~mort_5_yr+race+edu, design=des5,
family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Quadratic Model

quad<-svyglm(d.event~mort_5_yr+ I(mort_5_yr^2)+race+edu,
design=des5, family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Spline Model

library(splines)
spline<-svyglm(d.event~ns(mort_5_yr, df=3)+race+edu, design=des5,
family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Hazard Plots

gen_haz<-expand.grid(mort_5_yr = unique(p2$mort_5_yr),
race= unique(p2$race),
edu = unique(p2$edu))

gen_haz<-gen_haz%>%
dplyr::filter(complete.cases(.))

gen_haz$h0<-predict(constant, newdata=gen_haz, type="response")
gen_haz$h1<-predict(general, newdata=gen_haz, type="response")
gen_haz$h2<-predict(linear, newdata=gen_haz, type="response")
gen_haz$h3<-predict(quad, newdata=gen_haz, type="response")
gen_haz$h4<-predict(spline, newdata=gen_haz, type="response")

head(gen_haz)
  mort_5_yr race  edu         h0           h1          h2           h3
1         1  wht lths 0.03169028 0.0001841806 0.001265208 0.0008770999
2         2  wht lths 0.03169028 0.0017686644 0.001806214 0.0013567372
3         3  wht lths 0.03169028 0.0027734559 0.002578257 0.0020804929
4         4  wht lths 0.03169028 0.0038814303 0.003679691 0.0031625370
5         5  wht lths 0.03169028 0.0039379807 0.005250419 0.0047650592
6         6  wht lths 0.03169028 0.0069265518 0.007489110 0.0071156426
            h4
1 0.0008542499
2 0.0011473154
3 0.0015442655
4 0.0020876464
5 0.0028407811
6 0.0038994858
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
The following object is masked from 'package:purrr':

    transpose
library(magrittr)

Attaching package: 'magrittr'
The following object is masked from 'package:purrr':

    set_names
The following object is masked from 'package:tidyr':

    extract
fin<-melt(setDT(gen_haz),
id = c("mort_5_yr", "race", "edu"),
measure.vars = list(haz=c("h0", "h1","h2","h3", "h4")))
head(fin, n=20)
    mort_5_yr race  edu variable      value
 1:         1  wht lths       h0 0.03169028
 2:         2  wht lths       h0 0.03169028
 3:         3  wht lths       h0 0.03169028
 4:         4  wht lths       h0 0.03169028
 5:         5  wht lths       h0 0.03169028
 6:         6  wht lths       h0 0.03169028
 7:         7  wht lths       h0 0.03169028
 8:         8  wht lths       h0 0.03169028
 9:         9  wht lths       h0 0.03169028
10:        10  wht lths       h0 0.03169028
11:        11  wht lths       h0 0.03169028
12:        12  wht lths       h0 0.03169028
13:        13  wht lths       h0 0.03169028
14:        14  wht lths       h0 0.03169028
15:        15  wht lths       h0 0.03169028
16:        16  wht lths       h0 0.03169028
17:        17  wht lths       h0 0.03169028
18:        18  wht lths       h0 0.03169028
19:         1  blk lths       h0 0.03655862
20:         2  blk lths       h0 0.03655862
library(ggplot2)
fin%>%
dplyr::filter(edu == "bachelors")%>%
dplyr::mutate(mod = dplyr::case_when(.$variable =="h0" ~ "Constant",
.$variable =="h1" ~ "General",
.$variable =="h2" ~ "Linear",
.$variable =="h3" ~ "Polynomial - 2",
.$variable =="h4" ~ "Spline"))%>%
ggplot(aes(x = mort_5_yr*5, y=value ))+
geom_line(aes(group=race, color=race) )+
labs(title = "Hazard function for adult mortality",
subtitle = "Alternative model specifications")+
xlab("Age")+ylab("S(t)")+facet_wrap(~mod, scales= "free_y")
Don't know how to automatically pick scale for object of type svystat. Defaulting to continuous.

When looking at adult mortality hazard based on education, specifically individuals with a Bachelor’s degree, the risk hazard is higher for the non-Hispanic Blacks compared to all alternative models .

Model Fits

AICc<-AIC(constant)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICg<-AIC(general)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICl<-AIC(linear)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICq<-AIC(quad)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICs<-AIC(spline)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICS<-data.frame(AIC = c(AICc["AIC"],AICg["AIC"],AICl["AIC"],AICq["AIC"],AICs["AIC"]),
mod = factor(c("const", "general", "linear", "poly 2", "spline")) )
AICS$mod<-forcats::fct_relevel(AICS$mod,
c("general", "const" , "linear", "poly2", "spline"))
Warning: 1 unknown level in `f`: poly2
AICS$deltaAIC<-AICS$AIC - AICS$AIC[AICS$mod=="general"]
knitr::kable(AICS[, c("mod", "AIC", "deltaAIC")],
caption = "Relative AIC for alternative time specifications")
Relative AIC for alternative time specifications
mod AIC deltaAIC
const 283941.9 58666.495
general 225275.4 0.000
linear 229828.3 4552.898
poly 2 229739.2 4463.819
spline 229780.8 4505.426

None of the models are close to the AIC for the general model for this analysis.