Homework 5d

Author

Jules Gonzalez

Discrete Time Models

1.Fit the discrete time hazard model to your outcome

a)You must form a person-period data set

b)Consider both the general model and other time specifications

c)Include all main effects in the model

d)Test for an interaction between at least two of the predictors e)Generate hazard plots for interesting cases highlighting the significant predictors in your analysis

library(tidyverse)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.1      ✔ stringr 1.4.1 
✔ readr   2.1.2      ✔ forcats 0.5.2 
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
library(haven)
library(survival)
library(car)
Loading required package: carData

Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

The following object is masked from 'package:purrr':

    some
library(survey)
Loading required package: grid
Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'survey'

The following object is masked from 'package:graphics':

    dotchart
library(muhaz)
library(eha)
library(ipumsr)

ddi <- read_ipums_ddi("nhis_00008.xml")
alc <- read_ipums_micro(ddi)
Use of data from IPUMS NHIS is subject to conditions including that users
should cite the data appropriately. Use command `ipums_conditions()` for more
details.
alc<-haven::zap_labels(alc)

nams<-names(alc)

newnames<-tolower(gsub(pattern = "_",replacement =  "",x =  nams))
names(alc)<-newnames
library(dplyr)

#rent or own 
alc$home<-Recode(alc$ownership, recodes="10:12='own'; 20='rent'; else=NA", as.factor=T)

#us citizen
alc$us<-Recode(alc$citizen, recodes="1='nous'; 2='yesus'; else=NA", as.factor=T)
summary(alc$us)
  nous  yesus   NA's 
 94603 890065   8119 
#marital status
alc$marital<-car::Recode(as.numeric(alc$marst),recodes = "10:13='married'; 20='widowed'; 30='divorced'; 40='seperated';50='never married';else=NA")

 #sex                                    
alc$sex<-Recode(alc$sex, recodes="1='male'; 2='female'; else=NA", as.factor=T)

#smoker status
alc$smoke<-car::Recode(as.numeric(alc$smokestatus1),recodes = "10='never smoked'; 20='current smoker'; 30='former smoker';else=NA")

#asthma_ER
alc$aer<-car::Recode(as.numeric(alc$astheryr),recodes = "1='no er'; 2='yes er';else=NA")

#asthma attack
alc$aattack<-car::Recode(as.numeric(alc$asthatakyr),recodes = "1='no'; 2='yes';else=NA")

#poverty level
alc$povl<-car::Recode(as.numeric(alc$pooryn),recodes = "1='at or above povl'; 2='below povl';else=NA")

#education
alc$edu<-car::Recode(as.numeric(alc$educrec1),recodes = "01:13='lths'; 14='somecollege'; 15='bachelors';16='five+ yrs of college';else=NA")

#ever had lung cancer
alc$lung<-car::Recode(as.numeric(alc$cnlung),recodes = "0:1=0; 2=1;else=NA")

alc$aev<-car::Recode(as.numeric(alc$asthmaev),recodes = "0:1=0; 2=1;else=NA")


#did they die?
alc$d.event<-ifelse(alc$mortstat==1,1,0) 

#time of death post interview
alc$timetodeath<-ifelse(alc$mortstat ==1,
alc$mortdody-alc$year,
2002 - alc$year )

#die 1 yr or 5 yrs 
alc$d1yr<-ifelse(alc$timetodeath<=1&alc$mortstat==1, 1,0) 
alc$d5yr<-ifelse(alc$timetodeath<=5&alc$mortstat==1, 1,0)

#age
alc$age5<-cut(alc$age,seq(15,85, 5))

#coding age at death
alc$d.age<-ifelse(alc$mortstat==1,alc$mortdody-(alc$year-alc$age) ,
ifelse(alc$mortstat==2,2002-(alc$year-alc$age), NA))

#race
alc$race<-Recode(alc$racenew,
recodes ="100='wht'; 200 ='blk'; 300:542='other'; 997:999=NA",
as.factor=T)

alc <- alc %>%
  filter(mortelig == 1, mortdody<9999, is.na(race)==F, is.na(edu)==F)
alc <- alc %>%
  mutate(death_time = ifelse( mortelig ==1, 
                             mortdody - year , 
                             2002:2012 - year ), 
         d.event = ifelse(mortelig == 1 & death_time <= 5, 1, 0))

library(survival)

time_fit <- survfit(Surv(death_time, d.event) ~ 1, 
                  conf.type = "log",
                   data = alc)

library(ggsurvfit)

time_fit %>%
  ggsurvfit()+
  xlim(-1, 6) +
  add_censor_mark() +
  add_confidence_interval()+
  add_quantile(y_value = .975)
Warning: Removed 11 row(s) containing missing values (geom_path).

Person-Period File

p2<-survSplit(Surv(d.age, d.event)~., data=alc,
cut=seq(18, 100, 5), episode="mort_5_yr")
des5<-svydesign(ids=~psu, strata=~strata,
weights = ~mortwtsa, data=p2, nest=T)

Constant Model

constant<-svyglm(d.event~ sex+edu+marital,
design=des5, family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

General Model

general<-svyglm(d.event~factor(mort_5_yr) + sex+edu+marital,
design=des5, family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Linear Model

linear<-svyglm(d.event~mort_5_yr+sex+edu+marital, design=des5,
family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Quadratic Model

quad<-svyglm(d.event~mort_5_yr+ I(mort_5_yr^2)+sex+edu+marital,
design=des5, family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Spline Model

library(splines)
spline<-svyglm(d.event~ns(mort_5_yr, df=3)+sex+edu+marital, design=des5,
family=binomial (link="cloglog"))
Warning in eval(family$initialize): non-integer #successes in a binomial glm!

Hazard Plots

gen_haz<-expand.grid(mort_5_yr = unique(p2$mort_5_yr),
sex= unique(p2$sex),
marital= unique(p2$marital),
edu = unique(p2$edu))

gen_haz<-gen_haz%>%
dplyr::filter(complete.cases(.))

gen_haz$h0<-predict(constant, newdata=gen_haz, type="response")
gen_haz$h1<-predict(general, newdata=gen_haz, type="response")
gen_haz$h2<-predict(linear, newdata=gen_haz, type="response")
gen_haz$h3<-predict(quad, newdata=gen_haz, type="response")
gen_haz$h4<-predict(spline, newdata=gen_haz, type="response")

head(gen_haz)
  mort_5_yr    sex marital  edu         h0          h1           h2
1         1 female married lths 0.02707522 0.000146092 0.0009298503
2         2 female married lths 0.02707522 0.001404533 0.0013494833
3         3 female married lths 0.02707522 0.002207527 0.0019583071
4         4 female married lths 0.02707522 0.003067654 0.0028414128
5         5 female married lths 0.02707522 0.003149102 0.0041219351
6         6 female married lths 0.02707522 0.005618336 0.0059778104
            h3           h4
1 0.0006883198 0.0006755679
2 0.0010668388 0.0009064447
3 0.0016417688 0.0012193457
4 0.0025084710 0.0016487283
5 0.0038050449 0.0022466101
6 0.0057295515 0.0030929739
library(data.table)

Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':

    between, first, last
The following object is masked from 'package:purrr':

    transpose
library(magrittr)

Attaching package: 'magrittr'
The following object is masked from 'package:purrr':

    set_names
The following object is masked from 'package:tidyr':

    extract
fin<-melt(setDT(gen_haz),
id = c("mort_5_yr", "sex", "edu","marital"),
measure.vars = list(haz=c("h0", "h1","h2","h3", "h4")))
head(fin, n=20)
    mort_5_yr    sex  edu marital variable      value
 1:         1 female lths married       h0 0.02707522
 2:         2 female lths married       h0 0.02707522
 3:         3 female lths married       h0 0.02707522
 4:         4 female lths married       h0 0.02707522
 5:         5 female lths married       h0 0.02707522
 6:         6 female lths married       h0 0.02707522
 7:         7 female lths married       h0 0.02707522
 8:         8 female lths married       h0 0.02707522
 9:         9 female lths married       h0 0.02707522
10:        10 female lths married       h0 0.02707522
11:        11 female lths married       h0 0.02707522
12:        12 female lths married       h0 0.02707522
13:        13 female lths married       h0 0.02707522
14:        14 female lths married       h0 0.02707522
15:        15 female lths married       h0 0.02707522
16:        16 female lths married       h0 0.02707522
17:        17 female lths married       h0 0.02707522
18:        18 female lths married       h0 0.02707522
19:         1   male lths married       h0 0.03170243
20:         2   male lths married       h0 0.03170243
library(ggplot2)
fin%>%
dplyr::filter(edu == "bachelors")%>%
dplyr::mutate(mod = dplyr::case_when(.$variable =="h0" ~ "Constant",
.$variable =="h1" ~ "General",
.$variable =="h2" ~ "Linear",
.$variable =="h3" ~ "Polynomial - 2",
.$variable =="h4" ~ "Spline"))%>%
ggplot(aes(x = mort_5_yr*5, y=value ))+
geom_line(aes(group=marital, color=marital) )+
labs(title = "Hazard function for adult mortality",
subtitle = "Alternative model specifications")+
xlab("Age")+ylab("S(t)")+facet_wrap(~mod, scales= "free_y")
Don't know how to automatically pick scale for object of type svystat. Defaulting to continuous.

When looking at adult mortality hazard based on education level, specifically a bachelor’s degree and marital status, all Model Fits indicate that individuals who have never been married have a higher risk hazard for mortality when compared to individuals who have been married, divorced, widowed, or separated.

AICc<-AIC(constant)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICg<-AIC(general)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICl<-AIC(linear)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICq<-AIC(quad)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICs<-AIC(spline)
Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICS<-data.frame(AIC = c(AICc["AIC"],AICg["AIC"],AICl["AIC"],AICq["AIC"],AICs["AIC"]),
mod = factor(c("const", "general", "linear", "poly 2", "spline")) )
AICS$mod<-forcats::fct_relevel(AICS$mod,
c("general", "const" , "linear", "poly2", "spline"))
Warning: 1 unknown level in `f`: poly2
AICS$deltaAIC<-AICS$AIC - AICS$AIC[AICS$mod=="general"]
knitr::kable(AICS[, c("mod", "AIC", "deltaAIC")],
caption = "Relative AIC for alternative time specifications")
Relative AIC for alternative time specifications
mod AIC deltaAIC
const 282974.3 60924.043
general 222050.3 0.000
linear 226581.4 4531.148
poly 2 226524.5 4474.211
spline 226558.8 4508.487

None of the models are close to the AIC for the general model for this analysis.