library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ipumsr)
library(haven)
library(survival)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma
library(ggplot2)
library(ggpubr)
library(muhaz)
library(carData)
library(dplyr)
library(eha)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## 
## Attaching package: 'survey'
## 
## The following object is masked from 'package:graphics':
## 
##     dotchart
nhis <- read_ipums_ddi("nhis_00008.xml")
nhis_dat <- read_ipums_micro(nhis)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
nhis_dat <- haven::zap_labels(nhis_dat)

** Recode variable **

nhis_dat$MARST<- as.numeric(nhis_dat$MARST)

nhis_dat$MARST <- car:: Recode(nhis_dat$MARST, recodes= "11:40= 'married/ever married';  50= 'unmarried'; else= NA", as.factor= T)
nhis_dat$EDUC <- car:: Recode(nhis_dat$EDUC, recodes= "100:116= 'LHS';  201= 'High School'; 400='Bachelor'; 500:503='Graduate'; else= NA", as.factor= T)
nhis_dat <- nhis_dat %>%
  filter(MORTELIG == 1)

nhis_dat <- nhis_dat %>%
  filter(SEX == 2)

nhis_dat <- nhis_dat%>%
filter(complete.cases(.))

Include all main effects in the model

nhis_dat<- nhis_dat %>%
  mutate(death_time = ifelse( MORTELIG ==1, 
                             MORTDODY - YEAR , 
                             1990:2017 - YEAR ), 
         d5.event = ifelse(MORTELIG == 1 & death_time <= 5, 1, 0))

library(survival)

time_fit <- survfit(Surv(death_time, d5.event) ~ 1, 
                  conf.type = "log",
                   data = nhis_dat)

library(ggsurvfit)

time_fit %>%
  ggsurvfit()+
  xlim(-1, 6) +
  add_censor_mark() +
  add_confidence_interval()+
  add_quantile(y_value = .975)
## Warning: Removed 37 row(s) containing missing values (geom_path).

#Test for an interaction between at least two of the predictors:

options(survey.lonely.psu = "adjust")

des2<-svydesign(ids = ~PSU,
                strata = ~STRATA,
                weights=~MORTWTSA, 
                data=nhis_dat,
                nest=TRUE)

data<-svycoxph(Surv(death_time, d5.event)~EDUC+MARST,
                  design = des2)




summary(data)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (1932) clusters.
## svydesign(ids = ~PSU, strata = ~STRATA, weights = ~MORTWTSA, 
##     data = nhis_dat, nest = TRUE)
## Call:
## svycoxph(formula = Surv(death_time, d5.event) ~ EDUC + MARST, 
##     design = des2)
## 
##   n= 230527, number of events= 14690 
## 
##                     coef exp(coef) se(coef) robust se       z Pr(>|z|)    
## EDUCGraduate    -0.06267   0.93925  0.03528   0.07636  -0.821    0.412    
## EDUCHigh School  1.01841   2.76880  0.02272   0.04468  22.792   <2e-16 ***
## EDUCLHS          1.53500   4.64132  0.02279   0.04370  35.125   <2e-16 ***
## MARSTunmarried  -1.23681   0.29031  0.02569   0.05954 -20.772   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## EDUCGraduate       0.9393     1.0647    0.8087    1.0909
## EDUCHigh School    2.7688     0.3612    2.5366    3.0222
## EDUCLHS            4.6413     0.2155    4.2603    5.0564
## MARSTunmarried     0.2903     3.4446    0.2583    0.3262
## 
## Concordance= 0.687  (se = 0.003 )
## Likelihood ratio test= NA  on 4 df,   p=NA
## Wald test            = 2646  on 4 df,   p=<2e-16
## Score (logrank) test = NA  on 4 df,   p=NA
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
fit1<-svycoxph(Surv(time = death_time, d5.event)~EDUC+MARST, design=des2)
summary(fit1) 
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (1932) clusters.
## svydesign(ids = ~PSU, strata = ~STRATA, weights = ~MORTWTSA, 
##     data = nhis_dat, nest = TRUE)
## Call:
## svycoxph(formula = Surv(time = death_time, d5.event) ~ EDUC + 
##     MARST, design = des2)
## 
##   n= 230527, number of events= 14690 
## 
##                     coef exp(coef) se(coef) robust se       z Pr(>|z|)    
## EDUCGraduate    -0.06267   0.93925  0.03528   0.07636  -0.821    0.412    
## EDUCHigh School  1.01841   2.76880  0.02272   0.04468  22.792   <2e-16 ***
## EDUCLHS          1.53500   4.64132  0.02279   0.04370  35.125   <2e-16 ***
## MARSTunmarried  -1.23681   0.29031  0.02569   0.05954 -20.772   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## EDUCGraduate       0.9393     1.0647    0.8087    1.0909
## EDUCHigh School    2.7688     0.3612    2.5366    3.0222
## EDUCLHS            4.6413     0.2155    4.2603    5.0564
## MARSTunmarried     0.2903     3.4446    0.2583    0.3262
## 
## Concordance= 0.687  (se = 0.003 )
## Likelihood ratio test= NA  on 4 df,   p=NA
## Wald test            = 2646  on 4 df,   p=<2e-16
## Score (logrank) test = NA  on 4 df,   p=NA
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
fit.test<-cox.zph(fit1)
fit.test
##           chisq df p
## EDUC   2.39e-05  3 1
## MARST  1.59e-05  1 1
## GLOBAL 3.95e-05  4 1
plot(fit.test, df=2)

Produce plots of the survival function and the cumulative hazard function for different “risk profiles”, as done in the notes

plot(survfit(data),xlim=c(6,9),
     main="survivorship cox regression ")

Evaluate the assumptions of the Cox model, including proportionality of hazards of covariates, and if you use a continuous variable, evaluate linearity of the effect using Martingale residuals

cox.s<-svycoxph(Surv(death_time,d5.event)~EDUC+MARST,
                design=des2)
summary(cox.s)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (1932) clusters.
## svydesign(ids = ~PSU, strata = ~STRATA, weights = ~MORTWTSA, 
##     data = nhis_dat, nest = TRUE)
## Call:
## svycoxph(formula = Surv(death_time, d5.event) ~ EDUC + MARST, 
##     design = des2)
## 
##   n= 230527, number of events= 14690 
## 
##                     coef exp(coef) se(coef) robust se       z Pr(>|z|)    
## EDUCGraduate    -0.06267   0.93925  0.03528   0.07636  -0.821    0.412    
## EDUCHigh School  1.01841   2.76880  0.02272   0.04468  22.792   <2e-16 ***
## EDUCLHS          1.53500   4.64132  0.02279   0.04370  35.125   <2e-16 ***
## MARSTunmarried  -1.23681   0.29031  0.02569   0.05954 -20.772   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                 exp(coef) exp(-coef) lower .95 upper .95
## EDUCGraduate       0.9393     1.0647    0.8087    1.0909
## EDUCHigh School    2.7688     0.3612    2.5366    3.0222
## EDUCLHS            4.6413     0.2155    4.2603    5.0564
## MARSTunmarried     0.2903     3.4446    0.2583    0.3262
## 
## Concordance= 0.687  (se = 0.003 )
## Likelihood ratio test= NA  on 4 df,   p=NA
## Wald test            = 2646  on 4 df,   p=<2e-16
## Score (logrank) test = NA  on 4 df,   p=NA
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
fit.test<-cox.zph(cox.s)
fit.test
##           chisq df p
## EDUC   2.39e-05  3 1
## MARST  1.59e-05  1 1
## GLOBAL 3.95e-05  4 1
plot(fit.test, df=2)

##Aalen’s additive regression model:

fita<-aareg(Surv(death_time,d5.event)~EDUC+MARST+(STRATA),
            nhis_dat, weights = MORTWTSA)

summary(fita)
##                     slope      coef se(coef)      z         p
## Intercept        1.20e-02  2.72e-06 1.97e-07  13.80  2.04e-43
## EDUCGraduate    -4.32e-04 -1.07e-07 5.49e-08  -1.96  5.04e-02
## EDUCHigh School  6.62e-03  1.62e-06 5.20e-08  31.20 5.42e-214
## EDUCLHS          1.41e-02  3.34e-06 7.40e-08  45.20  0.00e+00
## MARSTunmarried  -7.81e-03 -1.95e-06 4.48e-08 -43.50  0.00e+00
## STRATA          -1.11e-06 -2.29e-10 3.30e-11  -6.95  3.75e-12
## 
## Chisq=3803.75 on 5 df, p=<2e-16; test weights=aalen
library(ggfortify)
autoplot(fita)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.