library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ipumsr)
library(haven)
library(survival)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
library(ggplot2)
library(ggpubr)
library(muhaz)
library(carData)
library(dplyr)
library(eha)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
nhis <- read_ipums_ddi("nhis_00009.xml")
nhis_dat <- read_ipums_micro(nhis)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
nhis_dat <- haven::zap_labels(nhis_dat)
nhis_dat <- nhis_dat %>%
filter(MORTELIG == 1 & is.na (nhis_dat$RACENEW)==F)
nhis_dat <- nhis_dat %>%
filter(SEX == 2)
nhis_dat <- nhis_dat%>%
filter(complete.cases(.))
nhis_dat$d.age <- ifelse(nhis_dat$MORTSTAT==1,nhis_dat$MORTDODY-(nhis_dat$YEAR-nhis_dat$AGE),
ifelse(nhis_dat$MORTSTAT==2,2017- (nhis_dat$YEAR-nhis_dat$AGE),NA))
nhis_dat$d.event <- ifelse(nhis_dat$MORTSTAT==1,1,0)
nhis_dat$timetodeath <- ifelse(nhis_dat$MORTSTAT==1,
nhis_dat$MORTDODY- nhis_dat$YEAR,
2017- nhis_dat$YEAR)
nhis_dat$d1yr <- ifelse (nhis_dat$timetodeath <=1 & nhis_dat$MORTSTAT==1,1,0)
nhis_dat$d5yr <- ifelse (nhis_dat$timetodeath <=5 & nhis_dat$MORTSTAT==1,1,0)
nhis_dat$MARST<- as.numeric(nhis_dat$MARST)
nhis_dat$MARST <- car:: Recode(nhis_dat$MARST, recodes= "11:40= 'married/ever married'; 50= 'unmarried'; else= NA", as.factor= T)
nhis_dat$mwt<-nhis_dat$MORTWT/mean(nhis_dat$MORTWT, na.rm=T)
nhis_dat$age5<-cut(nhis_dat$AGE,seq(15,85, 5))
nhis_dat$race<-Recode(nhis_dat$RACENEW,
recodes ="10='wht'; 20 ='blk'; 30:61='other'; 97:99=NA",
as.factor=T)
nhis_dat$EDUC <- car:: Recode(nhis_dat$EDUC, recodes= "100:116= 'LHS'; 201= 'High School'; 400='Bachelor'; 500:503='Graduate'; else= NA", as.factor= T)
nhis_dat$hisp<-Recode(nhis_dat$HISPYN, recodes="1=0; 2=1; else=NA")
nhis_dat$black<-ifelse(nhis_dat$race=='blk',1,0)
nhis_dat$oth<-ifelse(nhis_dat$race=='other',1,0)
nhis_dat$race_eth[nhis_dat$hisp == 0 & nhis_dat$race=="wht"]<-"NHWhite"
## Warning: Unknown or uninitialised column: `race_eth`.
nhis_dat$race_eth[nhis_dat$hisp == 0 & nhis_dat$race=="blk"]<-"NHBlack"
nhis_dat$race_eth[nhis_dat$hisp == 0 & nhis_dat$race=="other"]<-"NHother"
nhis_dat$race_eth[nhis_dat$hisp == 1 ]<-"Hispanic"
nhis_dat$race_eth[is.na(nhis_dat$hisp) ==T | is.na(nhis_dat$race)==T]<-NA
library(forcats)
nhis_dat$race_eth<-fct_relevel(nhis_dat$race_eth, c("NHWhite", "NHBlack" , "Hispanic", "NHother"))
## Warning: 3 unknown levels in `f`: NHWhite, NHBlack, and NHother
#Create person - period file :
subpp<-survSplit(Surv(d.age, d.event)~., data=nhis_dat,
cut=seq(18, 100, 5), episode="mort_5_yr")
#Analysis of death outcome:
des2<-svydesign(ids=~PSU, strata=~STRATA,
weights = ~MORTWTSA, data=subpp, nest=T)
m0<-svyglm(d.event~ MARST+ EDUC,
design=des2, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#general model:
m1<-svyglm(d.event~factor(mort_5_yr) + MARST+ EDUC,
design=des2, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#linear model:
m2<-svyglm(d.event~mort_5_yr+MARST+EDUC, design=des2,
family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#quadratic model:
m3<-svyglm(d.event~mort_5_yr+ I(mort_5_yr^2)+MARST+EDUC,
design=des2, family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
#spline model:
library(splines)
m4<-svyglm(d.event~ns(mort_5_yr, df=3)+MARST+EDUC, design=des2,
family=binomial (link="cloglog"))
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
newdat<-expand.grid(mort_5_yr = unique(subpp$mort_5_yr),
MARST= unique(subpp$MARST),
EDUC = unique(subpp$EDUC))
newdat<-newdat%>%
dplyr::filter(complete.cases(.))
newdat$h0<-predict(m0, newdata=newdat, type="response")
newdat$h1<-predict(m1, newdata=newdat, type="response")
newdat$h2<-predict(m2, newdata=newdat, type="response")
newdat$h3<-predict(m3, newdata=newdat, type="response")
newdat$h4<-predict(m4, newdata=newdat, type="response")
head(newdat)
## mort_5_yr MARST EDUC h0 h1 h2 h3
## 1 1 unmarried LHS 0.01366566 4.252137e-09 0.0001515967 0.0003056354
## 2 2 unmarried LHS 0.01366566 3.170265e-04 0.0002739518 0.0004811518
## 3 3 unmarried LHS 0.01366566 6.231514e-04 0.0004950366 0.0007675995
## 4 4 unmarried LHS 0.01366566 1.481012e-03 0.0008944615 0.0012409231
## 5 5 unmarried LHS 0.01366566 2.211903e-03 0.0016159055 0.0020327502
## 6 6 unmarried LHS 0.01366566 3.312365e-03 0.0029183926 0.0033736637
## h4
## 1 6.502085e-05
## 2 1.058818e-04
## 3 1.718903e-04
## 4 2.773363e-04
## 5 4.433532e-04
## 6 7.000697e-04
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
out<-melt(setDT(newdat),
id = c("mort_5_yr", "MARST", "EDUC"),
measure.vars = list(haz=c("h0", "h1","h2","h3", "h4")))
head(out, n=20)
## mort_5_yr MARST EDUC variable value
## 1: 1 unmarried LHS h0 0.01366566
## 2: 2 unmarried LHS h0 0.01366566
## 3: 3 unmarried LHS h0 0.01366566
## 4: 4 unmarried LHS h0 0.01366566
## 5: 5 unmarried LHS h0 0.01366566
## 6: 6 unmarried LHS h0 0.01366566
## 7: 7 unmarried LHS h0 0.01366566
## 8: 8 unmarried LHS h0 0.01366566
## 9: 9 unmarried LHS h0 0.01366566
## 10: 10 unmarried LHS h0 0.01366566
## 11: 11 unmarried LHS h0 0.01366566
## 12: 12 unmarried LHS h0 0.01366566
## 13: 13 unmarried LHS h0 0.01366566
## 14: 14 unmarried LHS h0 0.01366566
## 15: 15 unmarried LHS h0 0.01366566
## 16: 16 unmarried LHS h0 0.01366566
## 17: 17 unmarried LHS h0 0.01366566
## 18: 18 unmarried LHS h0 0.01366566
## 19: 1 married/ever married LHS h0 0.02628689
## 20: 2 married/ever married LHS h0 0.02628689
library(ggplot2)
out%>%
dplyr::filter(EDUC == "High School")%>%
dplyr::mutate(mod = dplyr::case_when(.$variable =="h0" ~ "Constant",
.$variable =="h1" ~ "General",
.$variable =="h2" ~ "Linear",
.$variable =="h3" ~ "Polynomial - 2",
.$variable =="h4" ~ "Spline"))%>%
ggplot(aes(x = mort_5_yr*5, y=value ))+
geom_line(aes(group=MARST, color=MARST) )+
labs(title = "Hazard function for adult mortality",
subtitle = "Alternative model specifications")+
xlab("Age")+ylab("S(t)")+
facet_wrap(~mod)#+ scale_y_continuous(trans = "log10")
## Don't know how to automatically pick scale for object of type svystat. Defaulting to continuous.
AIC0<-AIC(m0)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC1<-AIC(m1)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC2<-AIC(m2)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC3<-AIC(m3)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AIC4<-AIC(m4)
## Warning in eval(family$initialize): non-integer #successes in a binomial glm!
AICS<-data.frame(AIC = c(AIC0["AIC"],AIC1["AIC"],AIC2["AIC"],AIC3["AIC"],AIC4["AIC"]),
mod = factor(c("const", "general", "linear", "poly 2", "spline")) )
AICS$mod<-forcats::fct_relevel(AICS$mod,
c("general", "const" , "linear", "poly2", "spline"))
## Warning: 1 unknown level in `f`: poly2
AICS$deltaAIC<-AICS$AIC - AICS$AIC[AICS$mod=="general"]
knitr::kable(AICS[, c("mod", "AIC", "deltaAIC")],
caption = "Relative AIC for alternative time specifications")
| mod | AIC | deltaAIC |
|---|---|---|
| const | 511807.7 | 193054.321 |
| general | 318753.4 | 0.000 |
| linear | 320806.6 | 2053.194 |
| poly 2 | 320534.0 | 1780.659 |
| spline | 320322.4 | 1569.085 |
Here,we see that nothing touches the AIC for the general model in this case.