Packages

library(tidyverse)
library(skimr)
library(survival)
library(timereg)
library(cdnbcr)

Reading the dataset and cleaning

data("melanoma", package = "timereg")

# Transforming the data
melanoma <- melanoma %>% mutate(days = days/365.25,
                                thick = thick / 100)

# Delta
melanoma$delta <- as.numeric(melanoma$status == 1)

# Selecting variables
melanoma <- melanoma %>% dplyr::select(ulc, thick, days, delta)

glimpse(melanoma)
## Rows: 205
## Columns: 4
## $ ulc   <int> 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ thick <dbl> 6.76, 0.65, 1.34, 2.90, 12.08, 4.84, 5.16, 12.88, 3.22, 7.41, 4.~
## $ days  <dbl> 0.02737851, 0.08213552, 0.09582478, 0.27104723, 0.50650240, 0.55~
## $ delta <dbl> 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1~

Descriptive analysis

# Summary
skim(melanoma)
Data summary
Name melanoma
Number of rows 205
Number of columns 4
_______________________
Column type frequency:
numeric 4
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ulc 0 1 0.44 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
thick 0 1 2.92 2.96 0.10 0.97 1.94 3.56 17.42 ▇▂▁▁▁
days 0 1 5.89 3.07 0.03 4.18 5.49 8.33 15.24 ▃▇▅▂▁
delta 0 1 0.28 0.45 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▃
# Plots
hist(melanoma$days, main = "", xlab = "Time (years)", ylab = "Density", prob = TRUE)

boxplot(days ~ ulc, melanoma, xlab = "Ulceration status", ylab = "Time (years)",
        names = c("Absent", "Present"))

plot(days ~ thick, melanoma, pch = 16)

# Kaplan-Meier by ulceration
mKM <- survfit(Surv(days, delta) ~ ulc, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time (years)", ylab = "Surviving function", col = c(2, 4))
legend("bottomleft", c("ulc: absent", "ulc: present"), col = c(2, 4), lty = 1)

CDNBCR fit

Consider the following regression structure \[ \begin{align*} \log(\theta_i) = \beta_{11}I(\texttt{ulc}_i == ``\textrm{present}") \quad \textrm{and} \quad \log\left(\dfrac{p_i}{1 - p_i}\right) = \beta_{21} \texttt{thickness}_i\\ \end{align*} \]

Uncorrelated destructive fit (alpha = 0)

fit0 <- cdnbcr(Surv(days, delta) ~ ulc | thick - 1 | 1, data = melanoma, alpha = FALSE)

summary(fit0)
## Call:
## cdnbcr(formula = Surv(days, delta) ~ ulc | thick - 1 | 1, data = melanoma, 
##     alpha = FALSE)
## 
## Summary for residuals:
##      Mean Std. dev. Skewness Kurtosis
##   0.00228   0.99203  0.00898  2.80513
## 
## ----------------------------------------------------------------
## Mean competing causes:
## ----------------------------------------------------------------
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -0.37059    0.80002 -0.4632 0.643206   
## ulc          2.68363    0.98916  2.7130 0.006667 **
## 
## ----------------------------------------------------------------
## Activation probability:
## ----------------------------------------------------------------
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)
## thick  0.39352    0.33578   1.172   0.2412
## 
## ----------------------------------------------------------------
## Mean lifetime:
## ----------------------------------------------------------------
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.9540     0.3048  6.4106 1.45e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## phi: 4.361524 Std. Error: 2.845666
## sigma: 2.369241 Std. Error: 0.5514378
## 
## In addition, Log-lik value: -206.5607 
## AIC: 425.1213 and BIC: 445.0594 
## EM iterations: 1445

Some diagnostic plots:

## Fitted median cure rate by ulc
theta_h <- fit0$fitted$competing
phi_h <- fit0$nuisance$phi
p_h <- fit0$fitted$activation
alpha_h <- 0L

p0 <- cure_rate(theta_h, phi_h, p_h, alpha_h)

p0_absent <- median(p0[which(melanoma$ulc == 0)])
p0_present <- median(p0[which(melanoma$ulc == 1)])

# Graphically
mKM <- survfit(Surv(days, delta) ~ ulc, melanoma, se.fit = FALSE)
plot(mKM, col = c(2, 4))
abline(h = c(p0_absent, p0_present), col = c(2, 4), lty = c(2, 2))
legend("bottomleft", c("ulc: absent", "ulc: present"), col = c(2, 4), lty = 1)

# Quantile residuals
nrep <- 1000
mqresid <- NULL

# Populational survival function
Spop <- pcdnbcr(melanoma$days,
                fit0$fitted$competing,
                fit0$nuisance$phi,
                fit0$fitted$activation,
                0L,
                fit0$fitted$mean,
                fit0$nuisance$sigma, lower.tail = FALSE)

u <- melanoma$delta * (1 - Spop) + (1 - melanoma$delta) * runif(length(melanoma$days), 1 - Spop)
for (i in 1:1000) {
  qresid <- sort(qnorm(runif(u)))
  mqresid <- cbind(mqresid, qresid)
}

qresid <- apply(mqresid, 1, median)
qqnorm(qresid, pch = "+")
abline(0, 1, col = "grey", lwd = 2, lty = 1)

# Latent variables
plot(1:nrow(melanoma), fit0$latent$M, xlab = "Index", ylab = "Number of competing causes", pch = 16)
plot(1:nrow(melanoma), fit0$latent$D, xlab = "Index", ylab = "Total damage", pch = 16)

Correlated destructive fit

fit <- cdnbcr(Surv(days, delta) ~ ulc | thick - 1 | 1, data = melanoma)

summary(fit)
## Call:
## cdnbcr(formula = Surv(days, delta) ~ ulc | thick - 1 | 1, data = melanoma)
## 
## Summary for residuals:
##       Mean Std. dev. Skewness Kurtosis
##   -0.00243   0.99307  0.00258  2.79037
## 
## ----------------------------------------------------------------
## Mean competing causes:
## ----------------------------------------------------------------
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.34771    1.31276  0.2649  0.79111  
## ulc          3.46189    1.42116  2.4360  0.01485 *
## 
## ----------------------------------------------------------------
## Activation probability:
## ----------------------------------------------------------------
## Coefficients:
##       Estimate Std. Error t value Pr(>|t|)
## thick  0.43044    0.36720  1.1722   0.2411
## 
## ----------------------------------------------------------------
## Mean lifetime:
## ----------------------------------------------------------------
## Coefficients:
##             Estimate Std. Error t value  Pr(>|t|)    
## (Intercept)  2.06594    0.32938  6.2723 3.559e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## phi: 5.562336 Std. Error: 3.545342
## alpha: 0.9771577 Std. Error: 0.04973692
## sigma: 2.850249 Std. Error: 0.8238118
## 
## In addition, Log-lik value: -205.2152 
## AIC: 424.4305 and BIC: 447.6915 
## EM iterations: 2339

Some diagnostic plots:

## Fitted median cure rate by ulc
theta_h <- fit$fitted$competing
phi_h <- fit$nuisance$phi
p_h <- fit$fitted$activation
alpha_h <- fit$nuisance$alpha

p0 <- cure_rate(theta_h, phi_h, p_h, alpha_h)

p0_absent <- median(p0[which(melanoma$ulc == 0)])
p0_present <- median(p0[which(melanoma$ulc == 1)])

# Graphically
mKM <- survfit(Surv(days, delta) ~ ulc, melanoma, se.fit = FALSE)
plot(mKM, col = c(2, 4))
abline(h = c(p0_absent, p0_present), col = c(2, 4), lty = c(2, 2))
legend("bottomleft", c("ulc: absent", "ulc: present"), col = c(2, 4), lty = 1)

# Quantile residuals
nrep <- 1000
mqresid <- NULL

# Populational survival function
Spop <- pcdnbcr(melanoma$days,
                fit$fitted$competing,
                fit$nuisance$phi,
                fit$fitted$activation,
                fit$nuisance$alpha,
                fit$fitted$mean,
                fit$nuisance$sigma, lower.tail = FALSE)

u <- melanoma$delta * (1 - Spop) + (1 - melanoma$delta) * runif(length(melanoma$days), 1 - Spop)
for (i in 1:1000) {
  qresid <- sort(qnorm(runif(u)))
  mqresid <- cbind(mqresid, qresid)
}

qresid <- apply(mqresid, 1, median)
qqnorm(qresid, pch = "+")
abline(0, 1, col = "grey", lwd = 2, lty = 1)

# Latent variables
plot(1:nrow(melanoma), fit$latent$M, xlab = "Index", ylab = "Number of competing causes", pch = 16)
plot(1:nrow(melanoma), fit$latent$D, xlab = "Index", ylab = "Total damage", pch = 16)

Likelihood ratio test

Consider the test of the following hypotheses

\[ \begin{align*} \textrm{H}_0: \alpha = 0 \quad \textrm{versus} \quad \textrm{H}_a: \alpha \neq 0. \end{align*} \]

We consider the following statistics \[ \Lambda = 2 \{\ell(\mathbf{\widehat{\psi}}) - \ell(\mathbf{\widetilde{\psi}})\}, \] where \(\mathbf{\widehat{\psi}}\) and \(\mathbf{\widetilde{\psi}}\) are the unrestricted and restricted (under the null hypothesis) maximum likelihood estimates of \(\psi = (\mathbf{\beta}_1^\top, \phi, \beta_2^\top, \alpha, \beta_3^\top, \sigma)^\top\).

Under mild regularity conditions, we have that, under \(\textrm{H}_0\), \[ \Lambda \overset{a}{\sim} 0.5 \; \chi^2_1 + 0.5 \; \chi^2_0. \]

## Test statistic
Lambda <- 2 * (fit$logLik - fit0$logLik)

## Summary
data.frame(Lambda = Lambda,
           Critical_value = qchisq(1 - 2 * 0.05, 1))
##     Lambda Critical_value
## 1 2.690858       2.705543