library(dplyr)
library(tidyverse)
library(ggplot2)
library(infer)
library(readr)
library(skimr)
library(moderndive)
library(ISLR)
library(stats)
library(nleqslv)
library(ggplot2)
library(hexbin)
library(reshape2)
library(lattice)
library(gridExtra)
library(xtable)
library(splines)
library(survival)
library(grid)
library(lpSolve)
library(mice)
library(purrr)
library(survival)
library(knitr)
#skud ud til Dr.
Consider the time to death given by time and the event status defined from status!=0.
Estimate E(T∗ ∧ τ), with τ = 7, for people with (vf=1) or without (vf=0) based on Kaplan-Meier estimates for the two groups.
library(timereg)
data(TRACE)
nrow(TRACE)
## [1] 1878
view(TRACE)
summary(TRACE)
## id wmi status chf
## Min. : 3 Min. :0.300 Min. :0.000 Min. :0.0000
## 1st Qu.:1631 1st Qu.:1.100 1st Qu.:0.000 1st Qu.:0.0000
## Median :3332 Median :1.400 Median :9.000 Median :1.0000
## Mean :3346 Mean :1.398 Mean :4.637 Mean :0.5229
## 3rd Qu.:5142 3rd Qu.:1.800 3rd Qu.:9.000 3rd Qu.:1.0000
## Max. :6672 Max. :3.000 Max. :9.000 Max. :1.0000
## age sex diabetes time
## Min. :22.94 Min. :0.0000 Min. :0.0000 Min. :0.000687
## 1st Qu.:59.61 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.560750
## Median :68.23 Median :1.0000 Median :0.0000 Median :6.088000
## Mean :67.00 Mean :0.6954 Mean :0.1001 Mean :4.552470
## 3rd Qu.:75.39 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:6.987000
## Max. :96.33 Max. :1.0000 Max. :1.0000 Max. :8.482000
## vf
## Min. :0.00000
## 1st Qu.:0.00000
## Median :0.00000
## Mean :0.07242
## 3rd Qu.:0.00000
## Max. :1.00000
traceSurv <- survfit(Surv(time, status != 0) ~ vf, data=TRACE)
plot(traceSurv, mark.time=FALSE, conf.int=FALSE, col = c("darkseagreen", "darkseagreen2"), xlab = "Survival time, T=min(T*,tau)", ylab = "Survival probability")
rmean <- summary(traceSurv, rmean = 7)$table[,5:6]
rmean
## rmean se(rmean)
## vf=0 4.683315 0.06618355
## vf=1 2.975142 0.27134037
How do we interpret the numbers and their difference E(T∗ ∧ τ|vf = 1) − E(T∗ ∧ τ|vf = 0)? The restricted mean (with standard error) can be obtained from print.survit.
The number is the expected survival time given that the survival time can’t be larger than 7.
The CI’s for each of the variables describes the expected survival time. The difference describe the expected difference in survival times.
vf_0_conf <- rmean[1, 1] + c(-1, 0, 1) * 1.96 * rmean[1, 2]
vf_1_conf <- rmean[2, 1] + c(-1, 0, 1) * 1.96 * rmean[2, 2]
difference_conf <- rmean[2, 1] - rmean[1, 1] + c(-1, 0, 1) * 1.96 * sqrt(rmean[1, 2]^2 + rmean[2, 2]^2)
CIs <- data.frame(c("Estimate", "Upper bound", "Lower bound"),
vf_0_conf, vf_1_conf, difference_conf)
colnames(CIs) <- c("", "vf=0", "vf=1", "difference")
kable(CIs )
| vf=0 | vf=1 | difference | |
|---|---|---|---|
| Estimate | 4.553595 | 2.443315 | -2.255592 |
| Upper bound | 4.683315 | 2.975142 | -1.708173 |
| Lower bound | 4.813035 | 3.506969 | -1.160754 |
CIs
## vf=0 vf=1 difference
## 1 Estimate 4.553595 2.443315 -2.255592
## 2 Upper bound 4.683315 2.975142 -1.708173
## 3 Lower bound 4.813035 3.506969 -1.160754
Fit a model with constant hazard for each group (vf=0, vf=1) in the TRACE data and estimate the restricted mean life (up to τ = 7 years) for the two groups.
We fit a model with constant hazard by using the exponential distribution.
vf0_TRACE <- TRACE %>%
filter(vf == 0)
vf1_TRACE <- TRACE %>%
filter(vf == 1)
model_vf0 <- survreg(Surv(time, status != 0) ~ 1, data = vf0_TRACE, dist = "exponential")
model_vf1 <- survreg(Surv(time, status != 0) ~ 1, data = vf1_TRACE, dist = "exponential")
Compute standard errors based on the delta-theorem. That is, theoretically derive the estimator and the expression for the standard error, and use the estimators to estimate the quantities from the data).
We estimate the restricted mean life (up to τ = 7 years) for the two groups by the calculations derived earlier
tau <- 7
alpha_vf0 <- exp(-coef(model_vf0))
alpha_vf1 <- exp(-coef(model_vf1))
mu_vf0 <- (1-exp(- alpha_vf0 * tau))/alpha_vf0
mu_vf1 <- (1-exp(- alpha_vf1 * tau))/alpha_vf1
muhat <- t(c(mu_vf0, mu_vf1))
colnames(muhat) <- c( "vf=0", "vf=1")
kable(muhat )
| vf=0 | vf=1 |
|---|---|
| 4.914715 | 3.502502 |
The standard errors are thus computed by
N_vf0 <- sum(vf0_TRACE$status != 0)
N_vf1 <- sum(vf1_TRACE$status != 0)
se_vf0 <- sqrt(((alpha_vf0 * tau + 1) * exp(-alpha_vf0 * tau) - 1)^2 / (alpha_vf0^2*N_vf0))
se_vf1 <- sqrt(((alpha_vf1 * tau + 1) * exp(-alpha_vf1 * tau) - 1)^2 / (alpha_vf1^2*N_vf1))
sehat <- t(c(se_vf0,se_vf1))
colnames(sehat) <- c( "vf=0", "vf=1")
kable(sehat )
| vf=0 | vf=1 |
|---|---|
| 0.0547822 | 0.2165898 |
Confidence intervals for the estimates in the constant hazard model
vf0_conf <- mu_vf0 + c(-1, 0, 1) * 1.96 * se_vf0
vf1_conf <- mu_vf1 + c(-1, 0, 1) * 1.96 * se_vf1
new_difference_conf <- mu_vf1 - mu_vf0 + c(-1, 0, 1) * 1.96 * sqrt(se_vf0^2 + se_vf1^2)
new_CIs <- data.frame(c("Estimate", "Upper bound", "Lower bound"),
vf0_conf, vf1_conf, new_difference_conf)
colnames(new_CIs) <- c("", "vf=0", "vf=1", "difference")
kable(new_CIs )
| vf=0 | vf=1 | difference | |
|---|---|---|---|
| Estimate | 4.807342 | 3.077986 | -1.850098 |
| Upper bound | 4.914715 | 3.502502 | -1.412214 |
| Lower bound | 5.022088 | 3.927017 | -0.974329 |
Compare your estimates to those based on the Kaplan-Meier estimate (question 3).
comparison_table <- data.frame(
Group = rep(c("Kaplan-Meier: vf=0","Constant hazard: vf=0", "Kaplan-Meier: vf=1", "Constant hazard: vf=1",
"Kaplan-Meier: Difference", "Constant hazard: Difference")),
Lower = c(vf_0_conf[1], vf0_conf[1], vf_1_conf[1], vf1_conf[1], difference_conf[1], new_difference_conf[1] ),
Estimate = c(rmean[1,1], mu_vf0, rmean[2,1], mu_vf1, rmean[2,1]-rmean[1,1], mu_vf1 - mu_vf0),
Upper = c(vf_0_conf[3], vf0_conf[3], vf_1_conf[3], vf1_conf[3], difference_conf[3], new_difference_conf[3] )
)
comparison_table
## Group Lower Estimate Upper
## 1 Kaplan-Meier: vf=0 4.553595 4.683315 4.813035
## 2 Constant hazard: vf=0 4.807342 4.914715 5.022088
## 3 Kaplan-Meier: vf=1 2.443315 2.975142 3.506969
## 4 Constant hazard: vf=1 3.077985 3.502501 3.927017
## 5 Kaplan-Meier: Difference -2.255592 -1.708173 -1.160754
## 6 Constant hazard: Difference -1.850098 -1.412214 -0.974329