Case study I: Lung cancer data
This document presents an analysis of Near Redundancy and Practical
non-identifiability using the Lung cancer data set from the R package
survival. For more information see Rubio et al. (2019) and Espindola, Montoya, and Rubio (2023).
See the GitHub repository: Near Redundancy and
Practical non-identifiability: Survival Models
Acknolewdgement: We thank Mr. Ching Choy for
pointing out a bug in the bootstrap for the Hessian method. This bug
produced a classification probability \(\approx 1\), while the correct value is
\(\approx 0.7\), as shown in the
section “Bootstrap for distance-based criteria and Hessian method’’. The
conclusions are similar for the new corrected values, but it is now
clear that the Hessian method has a lower power in detecting inferential
problems than that of the ad hoc distance-based methods, as
expected.
Data preparation
########################################################################
# Case study I: Lung cancer data set from the survival R package
########################################################################
# See README file for more information
rm(list=ls())
# Routines
# https://github.com/FJRubio67/NRPNISurv
source("routines.R")
# Required packages (from CRAN)
# install.packages('PackageName')
library(knitr)
library(numDeriv)
library(survival)
library(flexsurv)
# Required packages (from GitHub)
#library(devtools)
#install_github("FJRubio67/HazReg")
library(HazReg)
##
## Attaching package: 'HazReg'
## The following objects are masked from 'package:flexsurv':
##
## hgamma, hllogis, hlnorm, hweibull, qllogis
#----------------------------------------------------------------------------------------------
# Lung cancer data
#---------------------------------------------------------------------------------------------
# data set
attach(lung)
head(lung)
## inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 3 306 2 74 1 1 90 100 1175 NA
## 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 1010 1 56 1 0 90 90 NA 15
## 4 5 210 2 57 1 1 90 60 1150 11
## 5 1 883 2 60 1 0 100 90 NA 0
## 6 12 1022 1 74 1 1 50 80 513 0
#******************
# Data preparation
#******************
# Removing a missing observation
dat <- lung[-14,]
# design matrix (hazard scale)
x <- as.matrix( cbind(scale(dat$age), dat$sex - 1, scale(dat$ph.ecog) ))
# sample size
n <- nrow(x)
# design matrix (time scale)
xt <- as.matrix(x[,1])
# vital status
status <- dat$status - 1
# Survival times
times <- dat$time/365.25
times <- ifelse(times==0,1/365.25,times)
Maximum likelihood estimation
#----------------------------------------------------------------------------------------------
# Maximum likelihood estimation
# See : https://github.com/FJRubio67/HazReg
#----------------------------------------------------------------------------------------------
# PGW model with no covariates
OPTPGW0 <- GHMLE(init = c(0,0,0), times = times, status = status,
hstr = "baseline", dist = "PGW", method = "nlminb", maxit = 10000)
# PGW-PH model
OPTPGWPH <- GHMLE(init = c(OPTPGW0$OPT$par*0, rep(0,ncol(x))), times = times, status = status,
hstr = "PH", dist = "PGW", des = x, method = "nlminb", maxit = 10000)
# PGW-AFT model
OPTPGWAFT <- GHMLE(init = c(OPTPGW0$OPT$par,rep(0,ncol(x))), times = times, status = status,
hstr = "AFT", dist = "PGW", des = x, method = "nlminb", maxit = 10000)
# PGW-GH model
OPTPGWGH <- GHMLE(init = c(OPTPGW0$OPT$par, rep(0, ncol(xt)), rep(0,ncol(x))), times = times, status = status,
hstr = "GH", dist = "PGW", des = x, des_t = xt, method = "nlminb", maxit = 10000)
# MLE PGW no covariates
MLE.PGW0 <- exp(OPTPGW0$OPT$par[1:3])
kable(MLE.PGW0, digits = 3)
# MLE PGW-PH structure
MLE.PGWPH <- c(exp(OPTPGWPH$OPT$par[1:3]), OPTPGWPH$OPT$par[-c(1:3)])
kable(MLE.PGWPH, digits = 3)
| 1.310 |
| 1.286 |
| 0.769 |
| 0.093 |
| -0.547 |
| 0.335 |
# MLE PGW-AFT structure
MLE.PGWAFT <- c(exp(OPTPGWAFT$OPT$par[1:3]), OPTPGWAFT$OPT$par[-c(1:3)])
kable(MLE.PGWAFT, digits = 3)
| 0.803 |
| 1.439 |
| 1.210 |
| 0.070 |
| -0.415 |
| 0.251 |
# MLE PGW-GH structure
MLE.PGWGH <- c(exp(OPTPGWGH$OPT$par[1:3]), OPTPGWGH$OPT$par[-c(1:3)])
kable(MLE.PGWGH, digits = 3)
| 1.194 |
| 1.314 |
| 0.861 |
| -1.479 |
| 0.681 |
| -0.553 |
| 0.330 |
Model selection
# AIC
AIC.PGW0 <- 2*OPTPGW0$OPT$objective + 2*length(MLE.PGW0)
AIC.PGWGH <- 2*OPTPGWGH$OPT$objective + 2*length(MLE.PGWGH)
AIC.PGWAFT <- 2*OPTPGWAFT$OPT$objective + 2*length(MLE.PGWAFT)
AIC.PGWPH <- 2*OPTPGWPH$OPT$objective + 2*length(MLE.PGWPH)
# Best model: PGW-PH
AICs <- c(AIC.PGW0, AIC.PGWPH, AIC.PGWAFT, AIC.PGWGH)
AICs
## [1] 365.2058 341.1689 341.2499 342.3572
which.min(AICs)
## [1] 2
Detecting Near Redundancy
######################################################################
# KL Divergence criterion
######################################################################
# KL Divergence
dKL <- minKL(MLE.PGWGH[1:3], c(1,1))$min.KL
dKL
## [1] 0.0003325461
# Redundancy constant
k = length(MLE.PGWGH)
M = 0.05
ne <- n - 0.5*sum(!status)
UKL <- 0.5*k*M*length(MLE.PGWGH)*log(ne)/ne
# Test
(dKL < UKL)
## [1] TRUE
######################################################################
# Hellinger distance criterion
######################################################################
# Hellinger Distance
DH <- minHell(MLE.PGWGH[1:3], c(1,1))$min.dHell
DH
## [1] 0.009597613
# Hellinger criterion
kappa <- 0.05
UH <- log( 1 - (1-2*kappa)^2)/(2*log(1-DH^2))
# Test
(ne < UH)
## [1] TRUE
######################################################################
# Hessian Method
######################################################################
# Hessian/Eigenvalues
HESS.PGW <- -hessian(OPTPGWGH$log_lik, x = OPTPGWGH$OPT$par)
eigen.val <- eigen(HESS.PGW)$values
ref.val <- abs(as.vector(eigen.val))/max(abs(as.vector(eigen.val)))
sev <- sort(ref.val)
sev
## [1] 6.469856e-05 1.187159e-02 2.493241e-02 3.186205e-02 1.051428e-01
## [6] 5.554996e-01 1.000000e+00
# Upper bound
Un <- 0.001
# Test
(sev[1] < Un)
## [1] TRUE
Bootstrap for distance-based criteria and Hessian method
#######################################################################################
# Bootstrap
#######################################################################################
# Number of Boostrap samples
B <- 1000
# Bootstrap MLEs
MLE.B <- matrix(0, ncol = length(MLE.PGWGH), nrow = B)
neB <- UKLB <- UHB <- DHB<- DKLB <- vector()
indHess <- vector()
for(i in 1:B){
ind <- sample(1:n, replace = T)
OPTB <- GHMLE(init = c(0,0,0, rep(0, ncol(xt)), rep(0,ncol(x))), times = times[ind], status = status[ind],
hstr = "GH", dist = "PGW", des = x[ind,], des_t = xt[ind,], method = "nlminb", maxit = 10000)
MLE.B[i,] <- c(exp(OPTB$OPT$par[1:3]), OPTB$OPT$par[-c(1:3)])
neB[i] <- n - 0.5*sum(!status[ind])
UKLB[i] <- 0.5*k*M*length(MLE.B[i,])*log(neB[i])/neB[i]
# Hessian/Eigenvalues
HESSB <- -hessian(OPTB$log_lik, x = OPTB$OPT$par)
eigen.val <- eigen(HESSB)$values
ref.val <- abs(as.vector(eigen.val))/max(abs(as.vector(eigen.val)))
sev <- sort(ref.val)
# Test
indHess[i] <- as.numeric(sev[1] < Un)
}
######################################################################
# Bootstrap KL Divergence criterion
######################################################################
# Bootstrap KL Divergence
for(i in 1:B){
if(MLE.B[i,3] < 0.01) DKLB[i] <- NA
if(MLE.B[i,3] >= 0.01) DKLB[i] <- minKL(MLE.B[i,1:3], c(1,1))$min.KL
}
# Bootstrap probability of KL divergence criterion
indKLB <- (DKLB < UKLB)
mean(indKLB, na.rm = TRUE)
## [1] 0.9947808
######################################################################
# Bootstrap Hellinger distance criterion
######################################################################
# Hellinger Distance
for(i in 1:B){
DHB[i] <- minHell(MLE.B[i,1:3], c(1,1))$min.dHell
# Hellinger criterion
UHB[i] <- log( 1 - (1-2*kappa)^2)/(2*log(1-DHB[i]^2))
}
# Bootstrap probability of Hellinger distance criterion
indHB <- (neB < UHB)
mean(indHB)
## [1] 0.903
######################################################################
# Bootstrap Hessian method
######################################################################
# Bootstrap probability of KL divergence criterion
mean(indHess)
## [1] 0.669
Detecting Practical Non-Identifiability
#######################################################################################
# Profile likelihood
#######################################################################################
p0 <- ncol(xt)
p1 <- ncol(x)
p <- length(MLE.PGWGH) # number of parameters
ML <- OPTPGWGH$OPT$objective # Maximum likelihood
# Profile likelihood function for parameter "ind"
prof.lik <- function(par1, ind){
tempf <- function(par){
tempv <- rep(0,p)
tempv <- replace(x = tempv, c(1:p)[-ind] , par)
tempv <- replace(x = tempv, ind , par1)
out0 <- OPTPGWGH$log_lik(tempv)
return(out0)
}
out <- -nlminb(OPTPGWGH$OPT$par[-ind]*0,tempf, control = list(iter.max = 10000))$objective + ML
out2 <- ifelse(exp(out)<=1, exp(out), 0)
return(out2)
}
# Profile likelihoods
# Profile likelihood of Parameter 1
prof1 <- Vectorize(function(par) prof.lik(log(par),1))
curve(prof1,0.01,7 , n = 500, lwd = 2, xlab = expression(sigma), ylab = "Profile Likelihood")

# Profile likelihood of Parameter 2
prof2 <- Vectorize(function(par) prof.lik(log(par),2))
curve(prof2,0.8,2 , n = 500, lwd = 2, xlab = expression(nu), ylab = "Profile Likelihood")

# Profile likelihood of Parameter 3
prof3 <- Vectorize(function(par) prof.lik(log(par),3))
curve(prof3,0.25,2.5 , n = 200, lwd = 2, xlab = expression(gamma), ylab = "Profile Likelihood")

# Profile likelihood of Parameter 4
prof4 <- Vectorize(function(par) prof.lik(par,4))
curve(prof4,-5,5, n = 500, lwd = 2, xlab = expression(alpha[1]), ylab = "Profile Likelihood",
ylim = c(0,1))

# Profile likelihood of Parameter 5
prof5 <- Vectorize(function(par) prof.lik(par,5))
curve(prof5,-2,2 , n = 200, lwd = 2, xlab = expression(beta[1]), ylab = "Profile Likelihood",
ylim = c(0,1))

# Profile likelihood of Parameter 6
prof6 <- Vectorize(function(par) prof.lik(par,6))
curve(prof6,-1.1, -0.1 , n = 200, lwd = 2, xlab = expression(beta[2]), ylab = "Profile Likelihood")

# Profile likelihood of Parameter 7
prof7 <- Vectorize(function(par) prof.lik(par,7))
curve(prof7,0,0.65, n = 200, lwd = 2, xlab = expression(beta[3]), ylab = "Profile Likelihood")

Alternative models
################################################
# Exponentiated Weibull model
################################################
# Exponentiated Weibull
OPTEWGH <- GHMLE(init = OPTPGWGH$OPT$par*0.1, times = times, status = status,
hstr = "GH", dist = "EW", des = x, des_t = xt, method = "nlminb", maxit = 10000)
# MLE EW-GH structure
MLE.EWGH <- c(exp(OPTEWGH$OPT$par[1:3]), OPTEWGH$OPT$par[-c(1:3)])
kable(MLE.EWGH, digits = 3)
| 1.180 |
| 1.653 |
| 0.750 |
| -0.769 |
| 0.430 |
| -0.573 |
| 0.335 |
AIC.EWGH <- 2*OPTEWGH$OPT$objective + 2*length(MLE.EWGH)
AIC.EWGH
## [1] 341.7245
################################################
# Weibull models (Simpler identifiable model)
################################################
# Optimisation
OPTW0 <- GHMLE(init = c(0,0), times = times, status = status,
hstr = "baseline", dist = "Weibull", method = "nlminb", maxit = 10000)
OPTWPH <- GHMLE(init = c(OPTW0$OPT$par*0, rep(0,ncol(x))), times = times, status = status,
hstr = "PH", dist = "Weibull", des = x, method = "nlminb", maxit = 10000)
OPTWAFT <- GHMLE(init = c(OPTW0$OPT$par*0,rep(0,ncol(x))), times = times, status = status,
hstr = "AFT", dist = "Weibull", des = x, method = "nlminb", maxit = 10000)
# MLE W no covariates
MLE.W0 <- exp(OPTW0$OPT$par[1:2])
kable(MLE.W0, digits = 3)
# MLE W-PH structure
MLE.WPH <- c(exp(OPTWPH$OPT$par[1:2]), OPTWPH$OPT$par[-c(1:2)])
kable(MLE.WPH, digits = 3)
| 0.984 |
| 1.368 |
| 0.093 |
| -0.549 |
| 0.333 |
# MLE W-AFT structure
MLE.WAFT <- c(exp(OPTWAFT$OPT$par[1:2]), OPTWAFT$OPT$par[-c(1:2)])
kable(MLE.WAFT, digits = 3)
| 0.984 |
| 1.368 |
| 0.068 |
| -0.401 |
| 0.244 |
# AIC
AIC.W0 <- 2*OPTW0$OPT$objective + 2*length(MLE.W0)
AIC.WAFT <- 2*OPTWAFT$OPT$objective + 2*length(MLE.WAFT)
AIC.WPH <- 2*OPTWPH$OPT$objective + 2*length(MLE.WPH)
# Best model: AFT-W
c(AIC.W0, AIC.WPH, AIC.WAFT)
## [1] 363.4652 339.4866 339.4866
Espindola, J. A., J. A. Montoya, and F. J. Rubio. 2023. “On Near
Redundancy and Identifiability of Parametric Hazard Regression Models
Under Censoring.” Biometrical Journal 65: 2300006.
Rubio, F. J., L. Remontet, N. P. Jewell, and A. Belot. 2019. “On a
General Structure for Hazard-Based Regression Models: An Application to
Population-Based Cancer Research.” Statistical Methods in
Medical Research 28: 2404–17.