practical6R.knit

UMIT course, PRACTICAL 6: RPSFTM

by Ian White and Nicholas Latimer
adaptation for R by Paul Schneider

# Function to install and load all the packages used in the course
install_n_load <- function(packages){
  for(package in packages){
    if(eval(parse(text=paste("require(",package,")")))==0) {
      install.packages(package)
    } 
    eval(parse(text=paste("require(",package,")")))
    
  } 
}
  required_packages = c("haven",
                        "psych",
                        "survival",
                        "flexsurv",
                        "ggplot2",
                        "cowplot",
                        "survminer",
                        "geepack",
                        "Hmisc",
                        "eha",
                        "rpsftm",
                        "rms")

# # clear work space from previous results
  rm(list = setdiff(ls(),c("install_n_load","required_packages")))
# # Load required packages
  install_n_load(required_packages)
  options(scipen = 99999)

Q1. Fit the “ever-treated” or “treatment-group” RPSFTM

Q1a

# # read data
novCEA = as.data.frame(read_dta("NovCEAevents.dta"))

Q1b

# create survival outcome
novCEA.surv = Surv(novCEA$deathtime,novCEA$deathind)

# create var for proportion time on treatment
novCEA$rx_prop = ifelse(novCEA$trtrand==1,1,0)
novCEA$rx_prop[novCEA$xoind==1] = with(novCEA[novCEA$xoind==1,], 1 - (xotime/deathtime))

# fit RPSFTM
rpsftm.fit = rpsftm(novCEA.surv ~ rand(arm = trtrand, rx= rx_prop),data = novCEA, test = survdiff)
rpsftm.fit

##   arm   rx.Min. rx.1st Qu. rx.Median   rx.Mean rx.3rd Qu.   rx.Max.
## 1   0 0.0000000  0.0000000 0.0000000 0.2485265  0.5377358 0.8242678
## 2   1 1.0000000  1.0000000 1.0000000 1.0000000  1.0000000 1.0000000
## Call:
## rpsftm(formula = novCEA.surv ~ rand(arm = trtrand, rx = rx_prop), 
##     data = novCEA, test = survdiff)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## .arm=0 108      103      103  0.000425  0.000754
## .arm=1 192      154      154  0.000285  0.000754
## 
##  Chisq= 0  on 1 degrees of freedom, p= 1 
## 
## psi: -0.5439109
## exp(psi): 0.5804736

# psi and 95% CI (again, differences in the 4th digit)
rpsftm.fit$psi

## [1] -0.5439109

rpsftm.fit$CI

## [1] -0.7492706 -0.3245246

Q1c

rpsftm.fit2 = rpsftm(novCEA.surv ~ rand(arm = trtrand, rx= rx_prop),data = novCEA, test = survdiff, censor_time=admin)
rpsftm.fit2

##   arm   rx.Min. rx.1st Qu. rx.Median   rx.Mean rx.3rd Qu.   rx.Max.
## 1   0 0.0000000  0.0000000 0.0000000 0.2485265  0.5377358 0.8242678
## 2   1 1.0000000  1.0000000 1.0000000 1.0000000  1.0000000 1.0000000
## Call:
## rpsftm(formula = novCEA.surv ~ rand(arm = trtrand, rx = rx_prop), 
##     data = novCEA, censor_time = admin, test = survdiff)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## .arm=0 108       94     94.2  0.000466  0.000754
## .arm=1 192      154    153.8  0.000285  0.000754
## 
##  Chisq= 0  on 1 degrees of freedom, p= 1 
## 
## psi: -0.5439109
## exp(psi): 0.5804736

# psi and 95% CI (again, differences in the 4th digit)
rpsftm.fit2$psi

## [1] -0.5439109

rpsftm.fit2$CI

## [1] -0.7492706 -0.3245246

Q2. Explore whether G-estimation has worked

Q2a

# Plot the graph of Z(psi) against psi
plot(rpsftm.fit2$eval_z, type="s",main="Ever-treated RPSFTM",ylab="log Z rank")
abline(h = c(1.96,-1.96), lty=2)
abline(h = 0,v = 0, lty=1)

Q2b: for Stata users only

Q2c: for Stata users only

Q2d: draw the fitted KM graphs

# construct a function to plot counter factual survivial times (also for later use)
plot.cf = function(mfit = rpsftm.fit2$fit,title=""){

rpsftm.df = data.frame(time = mfit$time, survival = mfit$surv, upper = mfit$upper, lower = mfit$lower)
rpsftm.df$group = factor( rep(1:2, mfit$strata), labels=c("Control","Treatment"))
rpsftm.df$model = "rpsftm: treatment free"

km.fit = survfit(novCEA.surv~novCEA$trtrand)
km.df = data.frame(time = km.fit$time, survival = km.fit$surv, upper = km.fit$upper, lower = km.fit$lower)
km.df$group = factor( rep(1:2, km.fit$strata), labels=c("Control","Treatment"))
km.df$model = "KM observed"

ggplot()+
  geom_step(data= rpsftm.df,aes(x = time, y = survival, group = group,lty=model,col=group)) +
  geom_step(data= km.df,aes(x = time, y = survival, group = group,lty=model,col=group))+ ggtitle(title)
} 
  
# plot
plot.cf(mfit = rpsftm.fit2$fit,title="")

NOTE: This paper reproduces the RPSFTM for the Concorde Trial, mentioned in the course-PPT, in R: https://journal.r-project.org/archive/2017/RJ-2017-068/RJ-2017-068.pdf

Q3 - Estimate the adjusted hazard ratio

# counterfactual untreated failure times.
S_psi  = rpsftm.fit2$Sstar
# index treated group
Stest <- novCEA$trtrand==1 
# replace counter factual with observed for the fully treated group
S_psi[Stest,] <- with(novCEA,Surv(deathtime,deathind))[Stest] 

# fit UNadjusted cox
cph_fit <- coxph(novCEA.surv ~ novCEA$trtrand) 
cph_fit

## Call:
## coxph(formula = novCEA.surv ~ novCEA$trtrand)
## 
##                   coef exp(coef) se(coef)      z         p
## novCEA$trtrand -0.5236    0.5924   0.1286 -4.071 0.0000469
## 
## Likelihood ratio test=15.88  on 1 df, p=0.00006735
## n= 300, number of events= 257

# And compare with adjusted Hazard Ratio
adj_cph_fit <- coxph(S_psi ~ novCEA$trtrand) 
adj_cph_fit

## Call:
## coxph(formula = S_psi ~ novCEA$trtrand)
## 
##                   coef exp(coef) se(coef)      z            p
## novCEA$trtrand -0.8014    0.4487   0.1420 -5.642 0.0000000169
## 
## Likelihood ratio test=30.47  on 1 df, p=0.00000003397
## n= 300, number of events= 248

# unadjusted 95% CI of the adjusted HR
data.frame(
  HR=exp(adj_cph_fit$coefficients),
lower=exp(confint(adj_cph_fit)[1]),
upper=exp(confint(adj_cph_fit)[2]))

##                       HR     lower     upper
## novCEA$trtrand 0.4487177 0.3396743 0.5927665

# adjusted 95% CI of the adjusted HR
# exp(ln(HR) +/- 1.96*(ln(HR)/Zstat)), with z= se of unadjusted HR
z <- coef(cph_fit)/sqrt(diag(vcov(cph_fit)))
data.frame(
  HR=exp(adj_cph_fit$coefficients),
lower=exp(adj_cph_fit$coefficients - 1.96*adj_cph_fit$coefficients/z),
upper=exp(adj_cph_fit$coefficients + 1.96*adj_cph_fit$coefficients/z))

##                       HR     lower     upper
## novCEA$trtrand 0.4487177 0.3050698 0.6600048

Q3+ Estimate adj CI using bootstrap

Bootstrapping adjusted CI

# bootstrapping
boot_rpfstm_ci = function(data=novCEA,iterations){
  n.rows = length(data[,1])
  boot.est = rep(NA,times=iterations)
  for(i in 1:iterations){
     cat("\r  ",i)
  data.b = data[sample(1:n.rows,size=n.rows,replace = T),]
  novCEA.surv.t = Surv(data.b$deathtime,data.b$deathind)
  rpsftm.fit.t = rpsftm(novCEA.surv.t ~ rand(arm = trtrand, rx= rx_prop),
                       data = data.b,
                       test = survdiff, 
                       censor_time = admin) 
  S_psi  = rpsftm.fit.t$Sstar
  Stest <- data.b$trtrand==1 
  S_psi[Stest,] <- with(data.b,Surv(deathtime,deathind))[Stest] 
  adj_cph_fit <- coxph(S_psi ~ data.b$trtrand) 
  boot.est[i] = adj_cph_fit$coefficients
  }
  ci = quantile(boot.est,c(0.05,0.95))
  est = mean(boot.est)
  res.df = data.frame(est= est,
                    lower = ci[1],
                    upper = ci[2])
  return(res.df)
}
boot.res = boot_rpfstm_ci(iterations = 10) # more iterations: more accurate, but slow!

boot.res

##          est      lower      upper
## 5% -0.739682 -0.8434899 -0.6095468

Q4 - Repeat the analysis in various different ways

Q4a - different g-tests

rpsftm.fit.alt1 = rpsftm(novCEA.surv ~ rand(arm = trtrand, rx= rx_prop),
                         data = novCEA,censor_time = admin, test = survreg)
rpsftm.fit.alt1

##   arm   rx.Min. rx.1st Qu. rx.Median   rx.Mean rx.3rd Qu.   rx.Max.
## 1   0 0.0000000  0.0000000 0.0000000 0.2485265  0.5377358 0.8242678
## 2   1 1.0000000  1.0000000 1.0000000 1.0000000  1.0000000 1.0000000
## 
## Coefficients:
## (Intercept) 
##     5.37699 
## 
## Scale= 0.6159772 
## n= 300 
## 
## psi: -0.5466217
## exp(psi): 0.5789022

#adj cox for alt1
S_psi.alt1  = rpsftm.fit.alt1$Sstar
S_psi.alt1[Stest,] <- with(novCEA,Surv(deathtime,deathind))[Stest] 
adj_cph_fit.alt1 <- coxph(S_psi.alt1 ~ novCEA$trtrand) 
adj_cph_fit.alt1

## Call:
## coxph(formula = S_psi.alt1 ~ novCEA$trtrand)
## 
##                   coef exp(coef) se(coef)      z            p
## novCEA$trtrand -0.8020    0.4484   0.1421 -5.646 0.0000000165
## 
## Likelihood ratio test=30.51  on 1 df, p=0.00000003323
## n= 300, number of events= 248

rpsftm.fit.alt2 = rpsftm(novCEA.surv ~ rand(arm = trtrand, rx= rx_prop),
                         data = novCEA,censor_time = admin ,test = coxph)
rpsftm.fit.alt2

##   arm   rx.Min. rx.1st Qu. rx.Median   rx.Mean rx.3rd Qu.   rx.Max.
## 1   0 0.0000000  0.0000000 0.0000000 0.2485265  0.5377358 0.8242678
## 2   1 1.0000000  1.0000000 1.0000000 1.0000000  1.0000000 1.0000000
## n= 300, number of events= 248
## 
## psi: -0.5440459
## exp(psi): 0.5803953

#adj cox for alt2
S_psi.alt2  = rpsftm.fit.alt2$Sstar
S_psi.alt2[Stest,] <- with(novCEA,Surv(deathtime,deathind))[Stest] 
adj_cph_fit.alt2 <- coxph(S_psi.alt2 ~ novCEA$trtrand) 
adj_cph_fit.alt2

## Call:
## coxph(formula = S_psi.alt2 ~ novCEA$trtrand)
## 
##                   coef exp(coef) se(coef)      z            p
## novCEA$trtrand -0.8014    0.4487   0.1420 -5.642 0.0000000169
## 
## Likelihood ratio test=30.47  on 1 df, p=0.00000003397
## n= 300, number of events= 248

Q4b - adjust for a covariate

# note we need to include the covariate in both rpsftm() and the subsequent Cox model
# with covariate
rpsftm.fit.covar = rpsftm(novCEA.surv ~ rand(arm = trtrand, rx= rx_prop)+
                            prognosisb,data = novCEA,censor_time = admin ,test = coxph)
rpsftm.fit.covar

##   arm   rx.Min. rx.1st Qu. rx.Median   rx.Mean rx.3rd Qu.   rx.Max.
## 1   0 0.0000000  0.0000000 0.0000000 0.2485265  0.5377358 0.8242678
## 2   1 1.0000000  1.0000000 1.0000000 1.0000000  1.0000000 1.0000000
##             coef exp(coef) se(coef)    z            p
## prognosisb 0.780     2.182    0.131 5.94 0.0000000028
## n= 300, number of events= 247
## 
## psi: -0.5469189
## exp(psi): 0.5787302

#adj cox for alt2
S_psi.covar  = rpsftm.fit.covar$Sstar
S_psi.covar[Stest,] <- with(novCEA,Surv(deathtime,deathind))[Stest] 
adj_cph_fit.covar <- coxph(S_psi.covar ~ novCEA$trtrand + novCEA$prognosisb) 
adj_cph_fit.covar

## Call:
## coxph(formula = S_psi.covar ~ novCEA$trtrand + novCEA$prognosisb)
## 
##                      coef exp(coef) se(coef)      z              p
## novCEA$trtrand    -0.8836    0.4133   0.1438 -6.144 0.000000000805
## novCEA$prognosisb  0.7548    2.1272   0.1311  5.758 0.000000008507
## 
## Likelihood ratio test=63.19  on 2 df, p=0.00000000000001895
## n= 300, number of events= 247

Q4c - sensitivity analysis to CTE assumption

novCEA$k = ifelse(novCEA$trtrand==1,1, 0.5) # effect modifier

rpsftm.fit.k05 = rpsftm(novCEA.surv ~ rand(arm = trtrand, rx= rx_prop),data = novCEA,censor_time = admin ,test = survdiff, treat_modifier = k)
rpsftm.fit.k05

##   arm   rx.Min. rx.1st Qu. rx.Median   rx.Mean rx.3rd Qu.   rx.Max.
## 1   0 0.0000000  0.0000000 0.0000000 0.2485265  0.5377358 0.8242678
## 2   1 1.0000000  1.0000000 1.0000000 1.0000000  1.0000000 1.0000000
## Call:
## rpsftm(formula = novCEA.surv ~ rand(arm = trtrand, rx = rx_prop), 
##     data = novCEA, censor_time = admin, test = survdiff, treat_modifier = k)
## 
##          N Observed Expected    (O-E)^2/E   (O-E)^2/V
## .arm=0 108      102      102 0.0000001106 0.000000195
## .arm=1 192      154      154 0.0000000732 0.000000195
## 
##  Chisq= 0  on 1 degrees of freedom, p= 1 
## 
## psi: -0.4437057
## exp(psi): 0.6416543

plot.cf(mfit = rpsftm.fit.k05$fit,title="Ever treated - effect after switching k = 0.5")

Q4d - “on treatment” model

–> on-treated analysis

# weird observations!
novCEA[novCEA$trtrand ==0 & !is.na(novCEA$disconexp) & novCEA$xoind==0,]

##  [1] BASELINE    id          prognosisb  CEAb        HRQLb       trtrand    
##  [7] PROGRESSION progtime    progind     CEAprog     EVENTS      xotime     
## [13] xoind       disconexp   deathtime   deathind    admin       rx_prop    
## [19] k          
## <0 rows> (or 0-length row.names)

# create proportion of time on treatment var
novCEA$tnowon = NA
ind1 = novCEA$trtrand ==1 & !is.na(novCEA$disconexp)
novCEA$tnowon[ind1] = novCEA$disconexp[ind1]
ind2 = novCEA$trtrand ==0 & novCEA$xoind==0
novCEA$tnowon[ind2] = 0
ind3 = novCEA$trtrand ==0 & !is.na(novCEA$disconexp) & novCEA$xoind==1
novCEA$tnowon[ind3] = novCEA$disconexp[ind3] - novCEA$xotime[ind3]

novCEA$ton_prop = novCEA$tnowon / novCEA$deathtime

# fit rpsftm
rpsftm.ton.fit = rpsftm(novCEA.surv ~ rand(arm = trtrand, rx= ton_prop),data = novCEA, test = survdiff,censor_time = admin,low_psi=-2,hi_psi = 1)

## Warning in root(qnorm(1 - alpha/2)): Multiple Roots found

rpsftm.ton.fit

##   arm   rx.Min. rx.1st Qu. rx.Median   rx.Mean rx.3rd Qu.   rx.Max.
## 1   0 0.0000000  0.0000000 0.0000000 0.1785152  0.3589891 0.7029289
## 2   1 0.2937063  0.5165921 0.6153846 0.6551200  0.7304348 1.0000000
## Call:
## rpsftm(formula = novCEA.surv ~ rand(arm = trtrand, rx = ton_prop), 
##     data = novCEA, censor_time = admin, test = survdiff, low_psi = -2, 
##     hi_psi = 1)
## 
##          N Observed Expected   (O-E)^2/E  (O-E)^2/V
## .arm=0 108       61       61 0.000000978 0.00000157
## .arm=1 192      101      101 0.000000590 0.00000157
## 
##  Chisq= 0  on 1 degrees of freedom, p= 1 
## 
## psi: -1.084247
## exp(psi): 0.3381563

### plot
plot.cf(mfit = rpsftm.ton.fit$fit,title="On-Treatment analysis")

exp(rpsftm.ton.fit$psi)

## [1] 0.3381563

rpsftm.ton.fit$CI

## [1] -1.5315857 -0.5432385

Stest <- novCEA$trtrand==1 
S_psi.ton  = rpsftm.ton.fit$Sstar
S_psi.ton[Stest,] <- with(novCEA,Surv(deathtime,deathind))[Stest] 
adj_cph_fit.ton <- coxph(S_psi.ton ~ novCEA$trtrand) 

exp(adj_cph_fit.ton$coefficients)

## novCEA$trtrand 
##      0.4542838