Packages

library(dplyr)
library(tidyverse)
library(ggplot2)
library(infer)
library(readr)
library(skimr)
library(moderndive)
library(ISLR)
library(stats)
library(nleqslv)
library(ggplot2)
library(hexbin)
library(reshape2)
library(lattice)
library(gridExtra)
library(xtable)
library(splines)
library(survival)
library(grid)
library(lpSolve)
library(mice)
library(purrr)
library(survival)
library(knitr)
#skud ud til Dr.

Exercise 1.c)

Consider the time to death given by time and the event status defined from status!=0.

Estimate E(T∗ ∧ τ), with τ = 7, for people with (vf=1) or without (vf=0) based on Kaplan-Meier estimates for the two groups.

library(timereg)

data(TRACE)

nrow(TRACE)
## [1] 1878
view(TRACE)

summary(TRACE)
##        id            wmi            status           chf        
##  Min.   :   3   Min.   :0.300   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:1631   1st Qu.:1.100   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :3332   Median :1.400   Median :9.000   Median :1.0000  
##  Mean   :3346   Mean   :1.398   Mean   :4.637   Mean   :0.5229  
##  3rd Qu.:5142   3rd Qu.:1.800   3rd Qu.:9.000   3rd Qu.:1.0000  
##  Max.   :6672   Max.   :3.000   Max.   :9.000   Max.   :1.0000  
##       age             sex            diabetes           time         
##  Min.   :22.94   Min.   :0.0000   Min.   :0.0000   Min.   :0.000687  
##  1st Qu.:59.61   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:1.560750  
##  Median :68.23   Median :1.0000   Median :0.0000   Median :6.088000  
##  Mean   :67.00   Mean   :0.6954   Mean   :0.1001   Mean   :4.552470  
##  3rd Qu.:75.39   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:6.987000  
##  Max.   :96.33   Max.   :1.0000   Max.   :1.0000   Max.   :8.482000  
##        vf         
##  Min.   :0.00000  
##  1st Qu.:0.00000  
##  Median :0.00000  
##  Mean   :0.07242  
##  3rd Qu.:0.00000  
##  Max.   :1.00000
traceSurv <- survfit(Surv(time, status != 0) ~ vf, data=TRACE)

plot(traceSurv, mark.time=FALSE, conf.int=FALSE, col = c("darkseagreen", "darkseagreen2"), xlab = "Survival time, T=min(T*,tau)", ylab = "Survival probability")

rmean <- summary(traceSurv, rmean = 7)$table[,5:6]

rmean
##         rmean  se(rmean)
## vf=0 4.683315 0.06618355
## vf=1 2.975142 0.27134037

How do we interpret the numbers and their difference E(T∗ ∧ τ|vf = 1) − E(T∗ ∧ τ|vf = 0)? The restricted mean (with standard error) can be obtained from print.survit.

The number is the expected survival time given that the survival time can’t be larger than 7.

The CI’s for each of the variables describes the expected survival time. The difference describe the expected difference in survival times.

vf_0_conf <- rmean[1, 1] + c(-1, 0, 1) * 1.96 * rmean[1, 2]

vf_1_conf <- rmean[2, 1] + c(-1, 0, 1) * 1.96 * rmean[2, 2]

difference_conf <- rmean[2, 1] - rmean[1, 1] + c(-1, 0, 1) * 1.96 * sqrt(rmean[1, 2]^2 + rmean[2, 2]^2)


CIs <- data.frame(c("Estimate", "Upper bound", "Lower bound"),
                  vf_0_conf, vf_1_conf, difference_conf)
colnames(CIs) <- c("", "vf=0", "vf=1", "difference")
kable(CIs )
vf=0 vf=1 difference
Estimate 4.553595 2.443315 -2.255592
Upper bound 4.683315 2.975142 -1.708173
Lower bound 4.813035 3.506969 -1.160754
CIs
##                   vf=0     vf=1 difference
## 1    Estimate 4.553595 2.443315  -2.255592
## 2 Upper bound 4.683315 2.975142  -1.708173
## 3 Lower bound 4.813035 3.506969  -1.160754

Exercise 1.d)

Fit a model with constant hazard for each group (vf=0, vf=1) in the TRACE data and estimate the restricted mean life (up to τ = 7 years) for the two groups.

We fit a model with constant hazard by using the exponential distribution.

vf0_TRACE <- TRACE %>%
  filter(vf == 0)

vf1_TRACE <- TRACE %>%
  filter(vf == 1)

model_vf0 <- survreg(Surv(time, status != 0) ~ 1, data = vf0_TRACE, dist = "exponential")

model_vf1 <- survreg(Surv(time, status != 0) ~ 1, data = vf1_TRACE, dist = "exponential")

Compute standard errors based on the delta-theorem. That is, theoretically derive the estimator and the expression for the standard error, and use the estimators to estimate the quantities from the data).

We estimate the restricted mean life (up to τ = 7 years) for the two groups by the calculations derived earlier

tau <- 7

alpha_vf0 <- exp(-coef(model_vf0))

alpha_vf1 <- exp(-coef(model_vf1))


mu_vf0 <- (1-exp(- alpha_vf0 * tau))/alpha_vf0

mu_vf1 <- (1-exp(- alpha_vf1 * tau))/alpha_vf1

muhat <- t(c(mu_vf0, mu_vf1))
colnames(muhat) <- c( "vf=0", "vf=1")
  
kable(muhat )
vf=0 vf=1
4.914715 3.502502

The standard errors are thus computed by

N_vf0 <- sum(vf0_TRACE$status != 0)
N_vf1 <- sum(vf1_TRACE$status != 0)


se_vf0 <- sqrt(((alpha_vf0 * tau + 1) * exp(-alpha_vf0 * tau) - 1)^2 / (alpha_vf0^2*N_vf0))
se_vf1 <- sqrt(((alpha_vf1 * tau + 1) * exp(-alpha_vf1 * tau) - 1)^2 / (alpha_vf1^2*N_vf1))


sehat <- t(c(se_vf0,se_vf1))
colnames(sehat) <- c( "vf=0", "vf=1")
  
kable(sehat )
vf=0 vf=1
0.0547822 0.2165898

Confidence intervals for the estimates in the constant hazard model

vf0_conf <- mu_vf0 + c(-1, 0, 1) * 1.96 * se_vf0

vf1_conf <- mu_vf1 + c(-1, 0, 1) * 1.96 * se_vf1

new_difference_conf <- mu_vf1 - mu_vf0 + c(-1, 0, 1) * 1.96 * sqrt(se_vf0^2 + se_vf1^2)


new_CIs <- data.frame(c("Estimate", "Upper bound", "Lower bound"),
                  vf0_conf, vf1_conf, new_difference_conf)
colnames(new_CIs) <- c("", "vf=0", "vf=1", "difference")
kable(new_CIs )
vf=0 vf=1 difference
Estimate 4.807342 3.077986 -1.850098
Upper bound 4.914715 3.502502 -1.412214
Lower bound 5.022088 3.927017 -0.974329

Compare your estimates to those based on the Kaplan-Meier estimate (question 3).

comparison_table <- data.frame(
  Group = rep(c("Kaplan-Meier: vf=0","Constant hazard: vf=0", "Kaplan-Meier: vf=1", "Constant hazard: vf=1", 
                "Kaplan-Meier: Difference", "Constant hazard: Difference")),
  Lower = c(vf_0_conf[1], vf0_conf[1], vf_1_conf[1], vf1_conf[1], difference_conf[1], new_difference_conf[1] ),
  Estimate = c(rmean[1,1], mu_vf0, rmean[2,1], mu_vf1, rmean[2,1]-rmean[1,1], mu_vf1 - mu_vf0),
  Upper = c(vf_0_conf[3], vf0_conf[3], vf_1_conf[3], vf1_conf[3], difference_conf[3], new_difference_conf[3] )
)

comparison_table
##                         Group     Lower  Estimate     Upper
## 1          Kaplan-Meier: vf=0  4.553595  4.683315  4.813035
## 2       Constant hazard: vf=0  4.807342  4.914715  5.022088
## 3          Kaplan-Meier: vf=1  2.443315  2.975142  3.506969
## 4       Constant hazard: vf=1  3.077985  3.502501  3.927017
## 5    Kaplan-Meier: Difference -2.255592 -1.708173 -1.160754
## 6 Constant hazard: Difference -1.850098 -1.412214 -0.974329