Reading the dataset and cleaning
data("melanoma", package = "timereg")
# Transforming the data
melanoma <- melanoma %>% mutate(days = days/365.25,
thick = thick / 100)
# Delta
melanoma$delta <- as.numeric(melanoma$status == 1)
# Selecting variables
melanoma <- melanoma %>% dplyr::select(ulc, thick, days, delta)
glimpse(melanoma)
## Rows: 205
## Columns: 4
## $ ulc <int> 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1~
## $ thick <dbl> 6.76, 0.65, 1.34, 2.90, 12.08, 4.84, 5.16, 12.88, 3.22, 7.41, 4.~
## $ days <dbl> 0.02737851, 0.08213552, 0.09582478, 0.27104723, 0.50650240, 0.55~
## $ delta <dbl> 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1~
Descriptive analysis
# Summary
skim(melanoma)
Data summary
| Name |
melanoma |
| Number of rows |
205 |
| Number of columns |
4 |
| _______________________ |
|
| Column type frequency: |
|
| numeric |
4 |
| ________________________ |
|
| Group variables |
None |
Variable type: numeric
| ulc |
0 |
1 |
0.44 |
0.50 |
0.00 |
0.00 |
0.00 |
1.00 |
1.00 |
▇▁▁▁▆ |
| thick |
0 |
1 |
2.92 |
2.96 |
0.10 |
0.97 |
1.94 |
3.56 |
17.42 |
▇▂▁▁▁ |
| days |
0 |
1 |
5.89 |
3.07 |
0.03 |
4.18 |
5.49 |
8.33 |
15.24 |
▃▇▅▂▁ |
| delta |
0 |
1 |
0.28 |
0.45 |
0.00 |
0.00 |
0.00 |
1.00 |
1.00 |
▇▁▁▁▃ |
# Plots
hist(melanoma$days, main = "", xlab = "Time (years)", ylab = "Density", prob = TRUE)
boxplot(days ~ ulc, melanoma, xlab = "Ulceration status", ylab = "Time (years)",
names = c("Absent", "Present"))
plot(days ~ thick, melanoma, pch = 16)
# Kaplan-Meier by ulceration
mKM <- survfit(Surv(days, delta) ~ ulc, melanoma, se.fit = FALSE)
plot(mKM, xlab = "Time (years)", ylab = "Surviving function", col = c(2, 4))
legend("bottomleft", c("ulc: absent", "ulc: present"), col = c(2, 4), lty = 1)




CDNBCR fit
Consider the following regression structure \[
\begin{align*}
\log(\theta_i) = \beta_{11}I(\texttt{ulc}_i ==
``\textrm{present}") \quad \textrm{and} \quad
\log\left(\dfrac{p_i}{1 - p_i}\right) =
\beta_{21} \texttt{thickness}_i\\
\end{align*}
\]
Uncorrelated destructive fit (alpha = 0)
fit0 <- cdnbcr(Surv(days, delta) ~ ulc | thick - 1 | 1, data = melanoma, alpha = FALSE)
summary(fit0)
## Call:
## cdnbcr(formula = Surv(days, delta) ~ ulc | thick - 1 | 1, data = melanoma,
## alpha = FALSE)
##
## Summary for residuals:
## Mean Std. dev. Skewness Kurtosis
## 0.00228 0.99203 0.00898 2.80513
##
## ----------------------------------------------------------------
## Mean competing causes:
## ----------------------------------------------------------------
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.37059 0.80002 -0.4632 0.643206
## ulc 2.68363 0.98916 2.7130 0.006667 **
##
## ----------------------------------------------------------------
## Activation probability:
## ----------------------------------------------------------------
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## thick 0.39352 0.33578 1.172 0.2412
##
## ----------------------------------------------------------------
## Mean lifetime:
## ----------------------------------------------------------------
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.9540 0.3048 6.4106 1.45e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## phi: 4.361524 Std. Error: 2.845666
## sigma: 2.369241 Std. Error: 0.5514378
##
## In addition, Log-lik value: -206.5607
## AIC: 425.1213 and BIC: 445.0594
## EM iterations: 1445
Some diagnostic plots:
## Fitted median cure rate by ulc
theta_h <- fit0$fitted$competing
phi_h <- fit0$nuisance$phi
p_h <- fit0$fitted$activation
alpha_h <- 0L
p0 <- cure_rate(theta_h, phi_h, p_h, alpha_h)
p0_absent <- median(p0[which(melanoma$ulc == 0)])
p0_present <- median(p0[which(melanoma$ulc == 1)])
# Graphically
mKM <- survfit(Surv(days, delta) ~ ulc, melanoma, se.fit = FALSE)
plot(mKM, col = c(2, 4))
abline(h = c(p0_absent, p0_present), col = c(2, 4), lty = c(2, 2))
legend("bottomleft", c("ulc: absent", "ulc: present"), col = c(2, 4), lty = 1)
# Quantile residuals
nrep <- 1000
mqresid <- NULL
# Populational survival function
Spop <- pcdnbcr(melanoma$days,
fit0$fitted$competing,
fit0$nuisance$phi,
fit0$fitted$activation,
0L,
fit0$fitted$mean,
fit0$nuisance$sigma, lower.tail = FALSE)
u <- melanoma$delta * (1 - Spop) + (1 - melanoma$delta) * runif(length(melanoma$days), 1 - Spop)
for (i in 1:1000) {
qresid <- sort(qnorm(runif(u)))
mqresid <- cbind(mqresid, qresid)
}
qresid <- apply(mqresid, 1, median)
qqnorm(qresid, pch = "+")
abline(0, 1, col = "grey", lwd = 2, lty = 1)
# Latent variables
plot(1:nrow(melanoma), fit0$latent$M, xlab = "Index", ylab = "Number of competing causes", pch = 16)
plot(1:nrow(melanoma), fit0$latent$D, xlab = "Index", ylab = "Total damage", pch = 16)




Correlated destructive fit
fit <- cdnbcr(Surv(days, delta) ~ ulc | thick - 1 | 1, data = melanoma)
summary(fit)
## Call:
## cdnbcr(formula = Surv(days, delta) ~ ulc | thick - 1 | 1, data = melanoma)
##
## Summary for residuals:
## Mean Std. dev. Skewness Kurtosis
## -0.00243 0.99307 0.00258 2.79037
##
## ----------------------------------------------------------------
## Mean competing causes:
## ----------------------------------------------------------------
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.34771 1.31276 0.2649 0.79111
## ulc 3.46189 1.42116 2.4360 0.01485 *
##
## ----------------------------------------------------------------
## Activation probability:
## ----------------------------------------------------------------
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## thick 0.43044 0.36720 1.1722 0.2411
##
## ----------------------------------------------------------------
## Mean lifetime:
## ----------------------------------------------------------------
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.06594 0.32938 6.2723 3.559e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## phi: 5.562336 Std. Error: 3.545342
## alpha: 0.9771577 Std. Error: 0.04973692
## sigma: 2.850249 Std. Error: 0.8238118
##
## In addition, Log-lik value: -205.2152
## AIC: 424.4305 and BIC: 447.6915
## EM iterations: 2339
Some diagnostic plots:
## Fitted median cure rate by ulc
theta_h <- fit$fitted$competing
phi_h <- fit$nuisance$phi
p_h <- fit$fitted$activation
alpha_h <- fit$nuisance$alpha
p0 <- cure_rate(theta_h, phi_h, p_h, alpha_h)
p0_absent <- median(p0[which(melanoma$ulc == 0)])
p0_present <- median(p0[which(melanoma$ulc == 1)])
# Graphically
mKM <- survfit(Surv(days, delta) ~ ulc, melanoma, se.fit = FALSE)
plot(mKM, col = c(2, 4))
abline(h = c(p0_absent, p0_present), col = c(2, 4), lty = c(2, 2))
legend("bottomleft", c("ulc: absent", "ulc: present"), col = c(2, 4), lty = 1)
# Quantile residuals
nrep <- 1000
mqresid <- NULL
# Populational survival function
Spop <- pcdnbcr(melanoma$days,
fit$fitted$competing,
fit$nuisance$phi,
fit$fitted$activation,
fit$nuisance$alpha,
fit$fitted$mean,
fit$nuisance$sigma, lower.tail = FALSE)
u <- melanoma$delta * (1 - Spop) + (1 - melanoma$delta) * runif(length(melanoma$days), 1 - Spop)
for (i in 1:1000) {
qresid <- sort(qnorm(runif(u)))
mqresid <- cbind(mqresid, qresid)
}
qresid <- apply(mqresid, 1, median)
qqnorm(qresid, pch = "+")
abline(0, 1, col = "grey", lwd = 2, lty = 1)
# Latent variables
plot(1:nrow(melanoma), fit$latent$M, xlab = "Index", ylab = "Number of competing causes", pch = 16)
plot(1:nrow(melanoma), fit$latent$D, xlab = "Index", ylab = "Total damage", pch = 16)




Likelihood ratio test
Consider the test of the following hypotheses
\[
\begin{align*}
\textrm{H}_0: \alpha = 0 \quad \textrm{versus} \quad \textrm{H}_a:
\alpha \neq 0.
\end{align*}
\]
We consider the following statistics \[
\Lambda = 2 \{\ell(\mathbf{\widehat{\psi}}) -
\ell(\mathbf{\widetilde{\psi}})\},
\] where \(\mathbf{\widehat{\psi}}\) and \(\mathbf{\widetilde{\psi}}\) are the
unrestricted and restricted (under the null hypothesis) maximum
likelihood estimates of \(\psi =
(\mathbf{\beta}_1^\top, \phi, \beta_2^\top, \alpha, \beta_3^\top,
\sigma)^\top\).
Under mild regularity conditions, we have that, under \(\textrm{H}_0\), \[
\Lambda \overset{a}{\sim} 0.5 \; \chi^2_1 + 0.5 \; \chi^2_0.
\]
## Test statistic
Lambda <- 2 * (fit$logLik - fit0$logLik)
## Summary
data.frame(Lambda = Lambda,
Critical_value = qchisq(1 - 2 * 0.05, 1))
## Lambda Critical_value
## 1 2.690858 2.705543