Introduction

We will need these libraries.

suppressWarnings(library(nphsim))

Any arbitrary hazard can be approximated via a piecewise exponential distribution. For distributions of this form, the hazard remains constant over predefined periods. The hazard, cumulative hazard, survival and density functions can be produced as follows

hpw <- function(t = 2, bins = c(0,  20,  35), rate = c(1/15, 1/7, 1/20)){
  if(any(t < 0)) stop("Negative values for t not supported")
  y <- sapply(seq_along(t), function(i){
    rate[findInterval(t[i], bins)]  
  })
  y  
}
Hpw <- function(t = 2, bins = c(0,  20,  35), rate = c(1/15, 1/7, 1/20)){
  if(any(t < 0)) stop("Negative values for t not supported")
  y <- sapply(seq_along(t), function(i){
    idx <- findInterval(t[i], bins)
    if(idx>1){
      HH <- sum(diff(bins)[1:(idx-1)] * rate[1:(idx-1)])
      HH <- HH + (t[i] - bins[idx])*rate[idx]
    } else {
      HH <- (t[i] - bins[idx])*rate[idx]
    }
    HH
  })
  y  
}
Spw <- function(t = 2, bins = c(0,  20,  35), rate = c(1/15, 1/7, 1/20)){
  if(any(t < 0)) stop("Negative values for t not supported")
  exp(-Hpw(t, bins, rate))
}
fpw <- function(t = 2, bins = c(0,  20,  35), rate = c(1/15, 1/7, 1/20)){
  if(any(t < 0)) stop("Negative values for t not supported")
  hpw(t, bins, rate) * exp(-Hpw(t, bins, rate))
}

A couple of examples of using the above functions

par(mfrow = c(2, 2))
x <- seq(0, 100, len = 1000)
hh <- hpw(x)
plot(x, hh, type = "l", ylim = range(c(0, hh)), main = "Hazard")
abline(v = c(20, 35), lty = 2, lwd = 0.3)
HH <- Hpw(x)
plot(x, HH, type = "l", main = "Cum. Hazard")
abline(v = c(20, 35), lty = 2, lwd = 0.3)
SS <- Spw(x)
plot(x, SS, type = "l", main = "Survival")
abline(v = c(20, 35), lty = 2, lwd = 0.3)
ff <- fpw(x)
plot(x, ff, type = "l", main = "Density")
abline(v = c(20, 35), lty = 2, lwd = 0.3)

par(mfrow = c(1, 1))

Two-part piecewise exponential distribution example

bins = c(0,  20)
rate = c(1/20, 1/5)

par(mfrow = c(2, 2))
x <- seq(0, 100, len = 1000)
hh <- hpw(x, bins, rate)
plot(x, hh, type = "l", ylim = range(c(0, hh)), main = "Hazard")
abline(v = bins[2], lty = 2, lwd = 0.3)
HH <- Hpw(x, bins, rate)
plot(x, HH, type = "l", main = "Cum. Hazard")
abline(v = bins[2], lty = 2, lwd = 0.3)
SS <- Spw(x, bins, rate)
plot(x, SS, type = "l", main = "Survival")
abline(v = bins[2], lty = 2, lwd = 0.3)
ff <- fpw(x, bins, rate)
plot(x, ff, type = "l", main = "Density")
abline(v = bins[2], lty = 2, lwd = 0.3)

par(mfrow = c(1, 1))

In an earlier post inverse cdf sampling by root finding was introduced. Simulate random samples from the two-part piecewise exponential and then compare the distribution of survival times to the results from the external nphsim package.

# This controls the range to be searched. Needs to be > 0.
interval = c(1E-8, 1000)
# To get sensible values.
return_finite <- function(x){
  x <- min(x, .Machine$double.xmax)
  x <- max(x, -.Machine$double.xmax)
  x
}
# The function to be solved. Note that the log transform is used
# to give numerical stability.
rootfn <- function(t, u, bins, rate){
  lambda <- Hpw(t, bins, rate)
  surv <- exp(-lambda); 
  return_finite(log(surv) - log(u))
}
# Creates random draws
rpw <- function(bins, rate){
  u_i <- runif(1)
  # rootfn(t=interval[1], u = u_i)  rootfn(t=interval[2], u = u_i)
  at_limit <- rootfn(interval[2], u = u_i,
                          bins = bins,
                          rate = rate)
  if(at_limit > 0){
    return(c(interval[2], 0))
  } else {
    # trying to find the value for t at which S(t) - u = 0
    t_i <- stats::uniroot(rootfn, 
                          u = u_i, 
                          interval = interval, 
                          check.conv = TRUE,
                          trace = 100,
                          bins = bins,
                          rate = rate)$root
    # our random sample from the surv dist
    return(t_i)
  }
}

bins = c(0,  20)
rate = c(1/20, 1/4)
yy1 <- nphsim::rpwexp(1000, 
                      rate = rate,
                      intervals = bins[2])

yy2 <- replicate(1000, rpw(bins, rate))

Compare the distribution of survival times

hist(yy2, 
     probability = TRUE, 
     xlab = "xnew", 
     main = "Histogram of xnew", 
     xlim = c(0, 50),
     ylim = range(density(yy1)$y))
lines(density(yy1), col = 2, lwd = 2)