The General Hazards (GH) model is a hazard-based regression model that is able to incorporate effects that play a role at the hazard and time scales (see Rubio et al. (2019) and this post). The GH model is formulated in terms of the hazard function:
General Hazards model (GH). \[\begin{eqnarray}\label{GH} h^{\text{GH}}\left(t;{\bf {x}}\right) &=& h_0\left(t \exp({\bf \tilde{x}}^T\alpha)\right)\exp({\bf x}^T\beta),\\ H^{\text{GH}}\left(t;{\bf {x}}\right) &=& H_0\left(t \exp({\bf \tilde{x}}^T\alpha)\right)\exp(-{\bf \tilde{x}}^T\alpha+{\bf {x}}^T\beta),\nonumber \end{eqnarray}\] where \(h_0\) is the baseline hazard, \({\bf x}\) are the available covariates that are assumed to have an effect at the hazard level, and \({\bf \tilde{x}}\) are the covariates that are assumed to have a time dependent effect.
Rubio et al. (2019) and Alvares and Rubio (2021) explored the use of several parametric baseline hazards that can capture basic shapes of interest (increasing, decreasing, unimodal, bathtub). In this post, we will focus on the use of the Power Generalised Weibull distribution.
In this post, we will present an application of the GH model using Bayesian inference. For simplicity, we will use weakly informative priors for all of the parameters. This is, Gamma priors with parameters \((0.001,0.001)\) for the shape and scale parameters of the PGW baseline hazard, and normal priors with large variance for the regression parameters.
To sample from the posterior distribution, we will employ the adaptive Metropolis within Gibbs sampler implemented in the R package spBayes
.
We will analyse the Leuk
data set available in the R package spBayesSurv
. This data contains information about 1043 acute myeloid leukemia patients. We will consider the time dependent effect of the variable age
(\({\bf \tilde{x}}\)), and the hazard level effects of age
, sex
, wbc
, and tpi
(\({\bf x}\)).
rm(list = ls())
# Required packages
library(survival)
library(spBayesSurv)
library(spBayes)
#########################
# Some functions
#########################
#----------------------------------------------------------------------------------------
# Power Generalised Weibull
# https://rpubs.com/FJRubio/PGW
#----------------------------------------------------------------------------------------
# PGW Probability Density Function
dpgw <- function(t, sigma, nu, gamma, log = FALSE){
val <- log(nu) - log(gamma) - nu*log(sigma) + (nu-1)*log(t) +
(1/gamma - 1)*log( 1 + (t/sigma)^nu ) +
( 1 - ( 1 + (t/sigma)^nu )^(1/gamma) )
if(log) return(val) else return(exp(val))
}
# PGW Survival Function
spgw <- function(t, sigma, nu, gamma, log.p = FALSE){
val <- 1 - ( 1 + (t/sigma)^nu )^(1/gamma)
if(log.p) return(val) else return(exp(val))
}
# PGW Hazard Function
hpgw <- function(t, sigma, nu, gamma, log = FALSE){
val <- log(nu) - log(gamma) - nu*log(sigma) + (nu-1)*log(t) +
(1/gamma - 1)*log( 1 + (t/sigma)^nu )
if(log) return(val) else return(exp(val))
}
# PGW Cumulative Hazard Function
chpgw <- function(t, sigma, nu, gamma){
val <- -1 + ( 1 + (t/sigma)^nu )^(1/gamma)
return(val)
}
# Random Number Generation Function PGW distribution
rpgw <- function(n, sigma, nu, gamma){
p <- runif(n)
out <- sigma*( ( 1 - log(1-p) )^gamma - 1 )^(1/nu)
return(as.vector(out))
}
# Quantile Function
qpgw <- function(p, sigma, nu, gamma){
out <- sigma*( ( 1 - log(1-p) )^gamma - 1 )^(1/nu)
return(out)
}
#####################################################################################
# Data preparation
#----------------------------------------------------------------------------------------------
# Example: Leukemia data
#---------------------------------------------------------------------------------------------
data(LeukSurv)
?LeukSurv
head(LeukSurv)
## time cens xcoord ycoord age sex wbc tpi district
## 1 1 1 0.2050717 0.4972437 61 0 13.3 -1.96 9
## 2 1 1 0.2855568 0.8489526 76 0 450.0 -3.39 7
## 3 1 1 0.1764057 0.7364939 74 0 154.0 -4.95 7
## 4 1 1 0.2447630 0.2105843 79 1 500.0 -1.40 24
## 5 1 1 0.3274531 0.9073870 83 1 160.0 -2.59 7
## 6 1 1 0.6383682 0.3627343 81 1 30.4 0.03 11
n <- dim(LeukSurv)[1] # number of individuals
# Design matrices
des <- as.matrix(cbind(scale(LeukSurv$age), LeukSurv$sex, scale(LeukSurv$wbc), scale(LeukSurv$tpi) ))
colnames(des) <- cbind("std age", "sex", "wbc", "tpi")
des_t <- as.matrix(cbind(scale(LeukSurv$age)))
hist(LeukSurv$time)
p0 <- dim(des_t)[2]
p1 <- dim(des)[2]
# Required quantities
status <- as.logical(LeukSurv$cens)
times <- as.vector(LeukSurv$time)/365.24 # in years
times.obs <- times[status]
######################################################################################
# Log posterior
# Reparameterised (all positive parameters are transformed into the real line using logs)
log.post <- function(par){
ae0 <- exp(par[1]); be0 <- exp(par[2]); ce0 <- exp(par[3]); alpha <- par[4:(3+p0)]; beta <- par[(4+p0):(3+p0+p1)]
exp.x.alpha <- as.vector(exp(des_t%*%alpha))
exp.x.beta <- as.vector(exp(des%*%beta))
exp.x.beta.dif <- as.vector(exp( des%*%beta - des_t%*%alpha ))
exp.x.alpha.obs <- exp.x.alpha[status]
exp.x.beta.obs <- exp.x.beta[status]
haz0 <- hpgw(times.obs*exp.x.alpha.obs,ae0,be0,ce0)*exp.x.beta.obs
log.lik <- sum(log(haz0)) - sum(chpgw(times*exp.x.alpha,ae0,be0,ce0)*exp.x.beta.dif)
log.prior <- dgamma(ae0, 0.001, 0.001) + dgamma(be0, 0.001, 0.001) + dgamma(ce0, 0.001, 0.001) +
sum(dnorm(alpha, 0, 10, log = TRUE)) + sum(dnorm(beta, 0, 10, log = TRUE))
return(log.lik + log.prior)
}
# initial point
inits <- runif(3+p0+p1)
n.batch <- 5500
batch.length <- 10
# Running an adaptive Metropolis within Gibbs sampler
set.seed(1234)
chain <- adaptMetropGibbs(ltd=log.post, starting=inits, accept.rate=0.44, batch=n.batch,
batch.length=batch.length, report=100, verbose=FALSE)$p.theta.samples
# # Burning and thinning the chain
burn <- 5000
thin <- 50
NS <- n.batch*batch.length
index <- seq(burn,NS,thin)
# Chain after burn in and thinning
chainp <- chain[index,]
chainp[,1:3] <- exp(chainp[,1:3])
# Posterior samples (after thinning and burn-in period)
apply(chainp,2,summary)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## Min. 0.06329488 0.7556497 1.985804 0.4451875 0.7251643 -0.12642768 0.0971679
## 1st Qu. 0.09977925 0.8717710 2.782501 0.8679154 0.9547863 0.02930607 0.1932329
## Median 0.11437508 0.9095319 3.019720 0.9946386 1.0190529 0.07374610 0.2162032
## Mean 0.11715147 0.9112268 3.033603 1.0020126 1.0192800 0.07581248 0.2156966
## 3rd Qu. 0.13054313 0.9493093 3.262874 1.1251442 1.0786378 0.12517379 0.2381054
## Max. 0.24085580 1.1044515 4.329858 1.7075527 1.3271822 0.35091495 0.3454598
## [,8]
## Min. -0.03004780
## 1st Qu. 0.07489079
## Median 0.09691551
## Mean 0.09801399
## 3rd Qu. 0.11928696
## Max. 0.21249058
# Trace plots (after thinning and burn-in period)
par(mfrow = c(3,3))
for(i in 1:ncol(chainp)) plot(chainp[,i], type = "l", ylab = paste("Parameter",i))
par(mfrow=c(1,1))
# Histograms (after thinning and burn-in period)
par(mfrow = c(3,3))
for(i in 1:ncol(chainp)) hist(chainp[,i], probability = T,
main = paste("Parameter",i), xlab = paste("Parameter",i))
par(mfrow=c(1,1))
# Estimated marginal population survival function using NMC posterior samples
NMC = 1000
# Marginal survival function
msurv <- Vectorize(function(t){
temp <- vector()
for(j in 1:NMC){
exp.x.alphap <- as.vector(exp(des_t%*%chainp[j,3]))
exp.x.beta.difp <- as.vector(exp( des%*%chainp[j,(3+p0):(2+p0+p1)] - des_t%*%chainp[j,3] ))
temp[j] <- mean(exp(- chpgw(t*exp.x.alphap, chainp[j,1], chainp[j,2], chainp[j,3])*exp.x.beta.difp))
}
return(mean(temp))
})
# Comparison with the Kaplan Meier estimator
plot(survfit(Surv(times, status) ~ 1, data = LeukSurv),
xlab = "Time",
ylab = "Marginal survival probability")
curve(msurv,0,14, ylim = c(0,1), lty = 1, lwd = 2, add = T, col = "blue")
legend("topright", legend = c("GH Model", "Kaplan-Meier"), col = c("blue","black"), lwd = c(2,2) )
Alvares, D., and F. J. Rubio. 2021. “A Tractable Bayesian Joint Model for Longitudinal and Survival Data.” Statistics in Medicine 40 (19): 4213–29.
Rubio, F. J., L. Remontet, N. P. Jewell, and A. Belot. 2019. “On a General Structure for Hazard-Based Regression Models: An Application to Population-Based Cancer Research.” Statistical Methods in Medical Research 28: 2404–17.