library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ipumsr)
library(haven)
library(survival)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
library(ggplot2)
library(ggpubr)
library(muhaz)
library(carData)
library(dplyr)
library(eha)
library(survey)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
## Attaching package: 'survey'
##
## The following object is masked from 'package:graphics':
##
## dotchart
nhis <- read_ipums_ddi("nhis_00008.xml")
nhis_dat <- read_ipums_micro(nhis)
## Use of data from IPUMS NHIS is subject to conditions including that users
## should cite the data appropriately. Use command `ipums_conditions()` for more
## details.
nhis_dat <- haven::zap_labels(nhis_dat)
** Recode variable **
nhis_dat$MARST<- as.numeric(nhis_dat$MARST)
nhis_dat$MARST <- car:: Recode(nhis_dat$MARST, recodes= "11:40= 'married/ever married'; 50= 'unmarried'; else= NA", as.factor= T)
nhis_dat$EDUC <- car:: Recode(nhis_dat$EDUC, recodes= "100:116= 'LHS'; 201= 'High School'; 400='Bachelor'; 500:503='Graduate'; else= NA", as.factor= T)
nhis_dat <- nhis_dat %>%
filter(MORTELIG == 1)
nhis_dat <- nhis_dat %>%
filter(SEX == 2)
nhis_dat <- nhis_dat%>%
filter(complete.cases(.))
Include all main effects in the model
nhis_dat<- nhis_dat %>%
mutate(death_time = ifelse( MORTELIG ==1,
MORTDODY - YEAR ,
1990:2017 - YEAR ),
d5.event = ifelse(MORTELIG == 1 & death_time <= 5, 1, 0))
library(survival)
time_fit <- survfit(Surv(death_time, d5.event) ~ 1,
conf.type = "log",
data = nhis_dat)
library(ggsurvfit)
time_fit %>%
ggsurvfit()+
xlim(-1, 6) +
add_censor_mark() +
add_confidence_interval()+
add_quantile(y_value = .975)
## Warning: Removed 37 row(s) containing missing values (geom_path).
#Test for an interaction between at least two of the predictors:
options(survey.lonely.psu = "adjust")
des2<-svydesign(ids = ~PSU,
strata = ~STRATA,
weights=~MORTWTSA,
data=nhis_dat,
nest=TRUE)
data<-svycoxph(Surv(death_time, d5.event)~EDUC+MARST,
design = des2)
summary(data)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (1932) clusters.
## svydesign(ids = ~PSU, strata = ~STRATA, weights = ~MORTWTSA,
## data = nhis_dat, nest = TRUE)
## Call:
## svycoxph(formula = Surv(death_time, d5.event) ~ EDUC + MARST,
## design = des2)
##
## n= 230527, number of events= 14690
##
## coef exp(coef) se(coef) robust se z Pr(>|z|)
## EDUCGraduate -0.06267 0.93925 0.03528 0.07636 -0.821 0.412
## EDUCHigh School 1.01841 2.76880 0.02272 0.04468 22.792 <2e-16 ***
## EDUCLHS 1.53500 4.64132 0.02279 0.04370 35.125 <2e-16 ***
## MARSTunmarried -1.23681 0.29031 0.02569 0.05954 -20.772 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## EDUCGraduate 0.9393 1.0647 0.8087 1.0909
## EDUCHigh School 2.7688 0.3612 2.5366 3.0222
## EDUCLHS 4.6413 0.2155 4.2603 5.0564
## MARSTunmarried 0.2903 3.4446 0.2583 0.3262
##
## Concordance= 0.687 (se = 0.003 )
## Likelihood ratio test= NA on 4 df, p=NA
## Wald test = 2646 on 4 df, p=<2e-16
## Score (logrank) test = NA on 4 df, p=NA
##
## (Note: the likelihood ratio and score tests assume independence of
## observations within a cluster, the Wald and robust score tests do not).
fit1<-svycoxph(Surv(time = death_time, d5.event)~EDUC+MARST, design=des2)
summary(fit1)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (1932) clusters.
## svydesign(ids = ~PSU, strata = ~STRATA, weights = ~MORTWTSA,
## data = nhis_dat, nest = TRUE)
## Call:
## svycoxph(formula = Surv(time = death_time, d5.event) ~ EDUC +
## MARST, design = des2)
##
## n= 230527, number of events= 14690
##
## coef exp(coef) se(coef) robust se z Pr(>|z|)
## EDUCGraduate -0.06267 0.93925 0.03528 0.07636 -0.821 0.412
## EDUCHigh School 1.01841 2.76880 0.02272 0.04468 22.792 <2e-16 ***
## EDUCLHS 1.53500 4.64132 0.02279 0.04370 35.125 <2e-16 ***
## MARSTunmarried -1.23681 0.29031 0.02569 0.05954 -20.772 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## EDUCGraduate 0.9393 1.0647 0.8087 1.0909
## EDUCHigh School 2.7688 0.3612 2.5366 3.0222
## EDUCLHS 4.6413 0.2155 4.2603 5.0564
## MARSTunmarried 0.2903 3.4446 0.2583 0.3262
##
## Concordance= 0.687 (se = 0.003 )
## Likelihood ratio test= NA on 4 df, p=NA
## Wald test = 2646 on 4 df, p=<2e-16
## Score (logrank) test = NA on 4 df, p=NA
##
## (Note: the likelihood ratio and score tests assume independence of
## observations within a cluster, the Wald and robust score tests do not).
fit.test<-cox.zph(fit1)
fit.test
## chisq df p
## EDUC 2.39e-05 3 1
## MARST 1.59e-05 1 1
## GLOBAL 3.95e-05 4 1
plot(fit.test, df=2)
Produce plots of the survival function and the cumulative hazard function for different “risk profiles”, as done in the notes
plot(survfit(data),xlim=c(6,9),
main="survivorship cox regression ")
Evaluate the assumptions of the Cox model, including proportionality of hazards of covariates, and if you use a continuous variable, evaluate linearity of the effect using Martingale residuals
cox.s<-svycoxph(Surv(death_time,d5.event)~EDUC+MARST,
design=des2)
summary(cox.s)
## Stratified 1 - level Cluster Sampling design (with replacement)
## With (1932) clusters.
## svydesign(ids = ~PSU, strata = ~STRATA, weights = ~MORTWTSA,
## data = nhis_dat, nest = TRUE)
## Call:
## svycoxph(formula = Surv(death_time, d5.event) ~ EDUC + MARST,
## design = des2)
##
## n= 230527, number of events= 14690
##
## coef exp(coef) se(coef) robust se z Pr(>|z|)
## EDUCGraduate -0.06267 0.93925 0.03528 0.07636 -0.821 0.412
## EDUCHigh School 1.01841 2.76880 0.02272 0.04468 22.792 <2e-16 ***
## EDUCLHS 1.53500 4.64132 0.02279 0.04370 35.125 <2e-16 ***
## MARSTunmarried -1.23681 0.29031 0.02569 0.05954 -20.772 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## EDUCGraduate 0.9393 1.0647 0.8087 1.0909
## EDUCHigh School 2.7688 0.3612 2.5366 3.0222
## EDUCLHS 4.6413 0.2155 4.2603 5.0564
## MARSTunmarried 0.2903 3.4446 0.2583 0.3262
##
## Concordance= 0.687 (se = 0.003 )
## Likelihood ratio test= NA on 4 df, p=NA
## Wald test = 2646 on 4 df, p=<2e-16
## Score (logrank) test = NA on 4 df, p=NA
##
## (Note: the likelihood ratio and score tests assume independence of
## observations within a cluster, the Wald and robust score tests do not).
fit.test<-cox.zph(cox.s)
fit.test
## chisq df p
## EDUC 2.39e-05 3 1
## MARST 1.59e-05 1 1
## GLOBAL 3.95e-05 4 1
plot(fit.test, df=2)
##Aalen’s additive regression model:
fita<-aareg(Surv(death_time,d5.event)~EDUC+MARST+(STRATA),
nhis_dat, weights = MORTWTSA)
summary(fita)
## slope coef se(coef) z p
## Intercept 1.20e-02 2.72e-06 1.97e-07 13.80 2.04e-43
## EDUCGraduate -4.32e-04 -1.07e-07 5.49e-08 -1.96 5.04e-02
## EDUCHigh School 6.62e-03 1.62e-06 5.20e-08 31.20 5.42e-214
## EDUCLHS 1.41e-02 3.34e-06 7.40e-08 45.20 0.00e+00
## MARSTunmarried -7.81e-03 -1.95e-06 4.48e-08 -43.50 0.00e+00
## STRATA -1.11e-06 -2.29e-10 3.30e-11 -6.95 3.75e-12
##
## Chisq=3803.75 on 5 df, p=<2e-16; test weights=aalen
library(ggfortify)
autoplot(fita)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.