Joint model with longitudinal and survival data

Lattice plot for data
library(JM)
## Warning: package 'JM' was built under R version 4.4.3
## Loading required package: MASS
## Loading required package: nlme
## Loading required package: splines
## Loading required package: survival
lattice::xyplot(log(serBilir) ~ year | id, group = id, 
       data = pbc2[pbc2$id %in% c(1:10),], 
       xlab = "years", ylab = expression(log("serBilir")), col = 1, type = "b")

Cox model with longitudinal data
library(tidyverse)
## Warning: package 'ggplot2' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::collapse() masks nlme::collapse()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ dplyr::lag()      masks stats::lag()
## ✖ dplyr::select()   masks MASS::select()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# data transform
pbc2_new=pbc2
pbc2_new$start=pbc2_new$year
pbc2_new=pbc2_new %>%
  arrange(id, year) %>%   
  group_by(id) %>%
  mutate(stop = lead(year, n = 1))  
pbc2_new=pbc2_new %>% mutate(event=ifelse(is.na(stop), status2,0 ), event_cr=ifelse(is.na(stop), status,1 ),stop = ifelse(is.na(stop), years, stop))
pbc2_new$event_cr=pbc2_new$event_cr-1
# cox
coxph=coxph(Surv(start,stop,event) ~ drug+ log(serBilir) +sex, data = pbc2_new, x = TRUE,model = TRUE) # event is death
coxph
## Call:
## coxph(formula = Surv(start, stop, event) ~ drug + log(serBilir) + 
##     sex, data = pbc2_new, model = TRUE, x = TRUE)
## 
##                   coef exp(coef) se(coef)      z      p
## drugD-penicil -0.02247   0.97778  0.17379 -0.129  0.897
## log(serBilir)  1.29019   3.63349  0.08533 15.120 <2e-16
## sexfemale     -0.30490   0.73720  0.22254 -1.370  0.171
## 
## Likelihood ratio test=288.6  on 3 df, p=< 2.2e-16
## n= 1945, number of events= 140
table(pbc2_new$event_cr)
## 
##    0    1    2 
## 1776   29  140
# FGR (Hist(start,stop, event_cr)~drug+ log(serBilir) +sex, data = pbc2_new)   #competing risk model doesn't work in this form
# Error in Hist(start, stop, event_cr) : 

Proportional-hazards (PH) assumption

test.ph <- cox.zph(coxph)
test.ph
##               chisq df     p
## drug          0.584  1 0.445
## log(serBilir) 4.386  1 0.036
## sex           0.572  1 0.450
## GLOBAL        5.247  3 0.155
# log(serBilir) not meet the ph assumption

Updated cox model time dependent

fit.ph<-coxph(Surv(start,stop,event) ~ drug+ log(serBilir) +sex +tt(log(serBilir)),
              tt=function(x,t,...) x*log(t+20 ),data=pbc2_new)
summary(fit.ph)
## Call:
## coxph(formula = Surv(start, stop, event) ~ drug + log(serBilir) + 
##     sex + tt(log(serBilir)), data = pbc2_new, tt = function(x, 
##     t, ...) x * log(t + 20))
## 
##   n= 1945, number of events= 140 
## 
##                        coef exp(coef)  se(coef)      z Pr(>|z|)   
## drugD-penicil       0.02494   1.02526   0.17500  0.143  0.88666   
## log(serBilir)       5.98033 395.57166   2.12445  2.815  0.00488 **
## sexfemale          -0.30194   0.73938   0.22258 -1.357  0.17493   
## tt(log(serBilir))  -1.46255   0.23164   0.65989 -2.216  0.02667 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## drugD-penicil        1.0253   0.975366   0.72757 1.445e+00
## log(serBilir)      395.5717   0.002528   6.15022 2.544e+04
## sexfemale            0.7394   1.352481   0.47797 1.144e+00
## tt(log(serBilir))    0.2316   4.316968   0.06355 8.443e-01
## 
## Concordance= 0.865  (se = 0.015 )
## Likelihood ratio test= 293.5  on 4 df,   p=<2e-16
## Wald test            = 227.4  on 4 df,   p=<2e-16
## Score (logrank) test = 342.8  on 4 df,   p=<2e-16
# for sex
plot(survfit(Surv(start, stop, event) ~ sex, data = pbc2_new),
     xlab = "Time",
     ylab = "Survival probability")

# for drug
plot(survfit(Surv(start, stop, event) ~ drug, data = pbc2_new),
     xlab = "Time",
     ylab = "Survival probability")

Joint model with longitudinal and survival data
# two datasets
# View(pbc2)  #long format , outcome is log serum bilirubin 
# View(pbc2.id) #baseline data, time to event data , event is death
lmeFit <-   
  # lme(log(serBilir) ~ drug * ns(year, 3), 
  #                           random = list(id = pdDiag(form = ~ ns(year, 3))), data = pbc2)
  lme(log(serBilir) ~ drug * year, random = ~ year | id, data = pbc2)
coxFit <- coxph(Surv(years, status2) ~ drug+sex, data = pbc2.id, x = TRUE)
# to fit this model we need to specify the 'derivForm' argument, which is a list
# with first component the derivative of the fixed-effects formula of 'lmeFit' with
# respect to 'obstime', second component the indicator of which fixed-effects 
# coefficients correspond to the previous defined formula, third component the 
# derivative of the random-effects formula of 'lmeFit' with respect to 'obstime', 
# and fourth component the indicator of which random-effects correspond to the 
# previous defined formula
dForm <- list(fixed = ~ 1 + drug, indFixed = c(2, 4), random = ~ 1, indRandom = 2)
jointModel=jointModel(lmeFit, coxFit, timeVar = "year", method = "spline-PH-aGH",
           parameterization = "both", derivForm = dForm)
# jointModel=jointModel(lmeFit, coxFit, timeVar = "year", method = "spline-PH-aGH")
jointModel
## 
## Call:
## jointModel(lmeObject = lmeFit, survObject = coxFit, timeVar = "year", 
##     parameterization = "both", method = "spline-PH-aGH", derivForm = dForm)
## 
## Variance Components:
## $`Random-Effects`
##              StdDev    Corr
## (Intercept)  0.9938        
## year         0.1909  0.5037
## 
## $Residual
## Longitudinal 
##       0.3472 
## 
## 
## Coefficients:
## $`Longitudinal Process`
##        (Intercept)      drugD-penicil               year drugD-penicil:year 
##             0.5548            -0.1361             0.2015            -0.0051 
## 
## $`Event Process`
## drugD-penicil     sexfemale        Assoct      Assoct.s           bs1 
##        0.0071       -0.3174        0.9982        3.3487       -3.4419 
##           bs2           bs3           bs4           bs5           bs6 
##       -4.1585       -3.2205       -3.6579       -3.3925       -3.2506 
##           bs7           bs8           bs9 
##       -0.8253       -6.8037       -1.4782 
## 
## 
## log-Lik: -1907.276
exp(confint(jointModel, parm = "Event"))
##                   2.5 %       est.     97.5 %
## drugD-penicil 0.6866487  1.0071361   1.477208
## sexfemale     0.4483839  0.7280572   1.182173
## Assoct        2.1115294  2.7134648   3.486994
## Assoct.s      3.4841545 28.4671129 232.589143
Perfromance of model
ND <- pbc2[pbc2$id == 2, ] # the data of Subject 2
head(ND,20)
##    id    years status      drug      age    sex      year ascites hepatomegaly
## 3   2 14.15234  alive D-penicil 56.44782 female 0.0000000      No          Yes
## 4   2 14.15234  alive D-penicil 56.44782 female 0.4983025      No          Yes
## 5   2 14.15234  alive D-penicil 56.44782 female 0.9993429      No          Yes
## 6   2 14.15234  alive D-penicil 56.44782 female 2.1027270      No          Yes
## 7   2 14.15234  alive D-penicil 56.44782 female 4.9008871     Yes          Yes
## 8   2 14.15234  alive D-penicil 56.44782 female 5.8892783     Yes          Yes
## 9   2 14.15234  alive D-penicil 56.44782 female 6.8858833     Yes          Yes
## 10  2 14.15234  alive D-penicil 56.44782 female 7.8907020     Yes          Yes
## 11  2 14.15234  alive D-penicil 56.44782 female 8.8325485     Yes          Yes
##    spiders                   edema serBilir serChol albumin alkaline  SGOT
## 3      Yes                No edema      1.1     302    4.14     7395 113.5
## 4      Yes                No edema      0.8      NA    3.60     2107 139.5
## 5      Yes                No edema      1.0      NA    3.55     1711 144.2
## 6      Yes                No edema      1.9      NA    3.92     1365 144.2
## 7      Yes      edema no diuretics      2.6     230    3.32     1110 131.8
## 8      Yes edema despite diuretics      3.6      NA    2.92      996 131.8
## 9      Yes edema despite diuretics      4.2      NA    2.73      860 145.7
## 10     Yes edema despite diuretics      3.6     244    2.80      779 119.0
## 11     Yes edema despite diuretics      4.6     237    2.67      669  88.0
##    platelets prothrombin histologic status2
## 3        221        10.6          3       0
## 4        188        11.0          3       0
## 5        161        11.6          3       0
## 6        122        10.6          3       0
## 7        135        11.3          3       0
## 8        100        11.5          3       0
## 9        103        11.5          3       0
## 10       113        11.5          3       0
## 11       100        11.5          3       0
plot(rocJM(jointModel, dt = c(1, 9,  10), ND, idVar = "id"))

# dt: a numeric vector indicating the lengths of the time intervals of primary interest
# within which we want to distinguish between subjects who died within the
# intervals from subjects who survived longer than that.
aucJM(jointModel, pbc2, Tstart = 1, Thoriz = 8)
## 
##  Time-dependent AUC for the Joint Model jointModel
## 
## Estimated AUC: 0.8628
## At time: 8
## Using information up to time: 1 (290 subjects still at risk)
Predict and plot

Prediction

set.seed(300716) # it uses Monte Carlo samples
preds <- survfitJM(jointModel, newdata = pbc2[pbc2$id %in% c("2"), ],
                   idVar = "id")  # last.time = "Time"
preds
## 
## Prediction of Conditional Probabilities of Event
##  based on 200 Monte Carlo samples
## 
## $`2`
##      times   Mean Median  Lower  Upper
## 1   8.8325 1.0000 1.0000 1.0000 1.0000
## 1   8.9405 0.9777 0.9802 0.9393 0.9918
## 2   9.3609 0.8831 0.8941 0.7198 0.9579
## 3   9.7813 0.7837 0.7981 0.5422 0.9198
## 4  10.2017 0.6946 0.7063 0.4118 0.8848
## 5  10.6221 0.6265 0.6299 0.3251 0.8584
## 6  11.0425 0.5786 0.5827 0.2687 0.8413
## 7  11.4629 0.5449 0.5534 0.2309 0.8305
## 8  11.8833 0.5205 0.5284 0.1918 0.8134
## 9  12.3037 0.5000 0.5080 0.1530 0.7867
## 10 12.7241 0.4830 0.4887 0.1143 0.7653
## 11 13.1445 0.4655 0.4733 0.0885 0.7448
## 12 13.5649 0.4375 0.4299 0.0712 0.7363
## 13 13.9853 0.3781 0.3790 0.0280 0.6960
## 14 14.4057 0.1302 0.0949 0.0000 0.4631
# 
survfitJM(jointModel, newdata = pbc2[pbc2$id %in% c("2"), ],
          idVar = "id",
          survTimes = c(8, 10, 12))  # you can specify the times
## 
## Prediction of Conditional Probabilities of Event
##  based on 200 Monte Carlo samples
## 
## $`2`
##     times   Mean Median  Lower  Upper
## 1  8.8325 1.0000 1.0000 1.0000 1.0000
## 1 10.0000 0.7550 0.7795 0.4691 0.9096
## 2 12.0000 0.5572 0.5805 0.1750 0.8427

Plot

par(mfrow=c(1,2))
plot(preds, which = "2", conf.int = TRUE)

plot(preds, which = "2", conf.int = TRUE, 
     fun = function (x) -log(x), ylab = "Cumulative Risk")

# overall true plot
sfit2 <- survfitJM(jointModel, newdata = pbc2[pbc2$id == "2", ], idVar = "id")
plot(sfit2, include.y = TRUE, conf.int = TRUE)

sfit7 <- survfitJM(jointModel, newdata = pbc2[pbc2$id == "7", ], idVar = "id")
plot(sfit7, include.y = TRUE, conf.int = TRUE)

Predicted plot using the first n observations

survPreds <- vector("list", nrow(ND))
for (i in 1:nrow(ND)) {
  survPreds[[i]] <- survfitJM(jointModel, newdata = ND[1:3, ])
}
ND[1:3, ]
##   id    years status      drug      age    sex      year ascites hepatomegaly
## 3  2 14.15234  alive D-penicil 56.44782 female 0.0000000      No          Yes
## 4  2 14.15234  alive D-penicil 56.44782 female 0.4983025      No          Yes
## 5  2 14.15234  alive D-penicil 56.44782 female 0.9993429      No          Yes
##   spiders    edema serBilir serChol albumin alkaline  SGOT platelets
## 3     Yes No edema      1.1     302    4.14     7395 113.5       221
## 4     Yes No edema      0.8      NA    3.60     2107 139.5       188
## 5     Yes No edema      1.0      NA    3.55     1711 144.2       161
##   prothrombin histologic status2
## 3        10.6          3       0
## 4        11.0          3       0
## 5        11.6          3       0
survPreds[[3]]
## 
## Prediction of Conditional Probabilities of Event
##  based on 200 Monte Carlo samples
## 
## $`2`
##      times   Mean Median  Lower  Upper
## 1   0.9993 1.0000 1.0000 1.0000 1.0000
## 1   1.3734 0.9959 0.9969 0.9867 0.9993
## 2   1.7938 0.9906 0.9933 0.9689 0.9985
## 3   2.2142 0.9844 0.9888 0.9462 0.9975
## 4   2.6346 0.9770 0.9835 0.9235 0.9965
## 5   3.0550 0.9687 0.9780 0.8911 0.9954
## 6   3.4754 0.9597 0.9723 0.8519 0.9944
## 7   3.8958 0.9502 0.9666 0.8161 0.9936
## 8   4.3162 0.9402 0.9608 0.7756 0.9929
## 9   4.7366 0.9295 0.9554 0.7299 0.9924
## 10  5.1570 0.9179 0.9496 0.6795 0.9918
## 11  5.5774 0.9048 0.9439 0.6202 0.9913
## 12  5.9978 0.8901 0.9377 0.5484 0.9908
## 13  6.4182 0.8736 0.9312 0.4928 0.9903
## 14  6.8386 0.8557 0.9226 0.4332 0.9897
## 15  7.2590 0.8365 0.9127 0.3594 0.9892
## 16  7.6794 0.8154 0.9008 0.2678 0.9889
## 17  8.0997 0.7913 0.8857 0.1706 0.9885
## 18  8.5201 0.7630 0.8675 0.0952 0.9881
## 19  8.9405 0.7299 0.8482 0.0348 0.9874
## 20  9.3609 0.6936 0.8209 0.0095 0.9864
## 21  9.7813 0.6577 0.7813 0.0013 0.9853
## 22 10.2017 0.6273 0.7465 0.0002 0.9850
## 23 10.6221 0.6054 0.7219 0.0000 0.9848
## 24 11.0425 0.5908 0.7013 0.0000 0.9846
## 25 11.4629 0.5809 0.6901 0.0000 0.9845
## 26 11.8833 0.5741 0.6836 0.0000 0.9844
## 27 12.3037 0.5683 0.6800 0.0000 0.9842
## 28 12.7241 0.5637 0.6768 0.0000 0.9841
## 29 13.1445 0.5589 0.6731 0.0000 0.9839
## 30 13.5649 0.5505 0.6658 0.0000 0.9837
## 31 13.9853 0.5292 0.6357 0.0000 0.9834
## 32 14.4057 0.4363 0.4527 0.0000 0.9808
par(mfrow = c(2, 2), oma = c(0, 2, 0, 2))
for (i in c(1,3,5,7)) {
  plot(survPreds[[i]], estimator = "median", conf.int = TRUE,
       include.y = TRUE, main = paste("Follow-up time:",
                                      round(survPreds[[i]]$last.time, 1)), ylab = "", ylab2 = "")
}
mtext("log serum bilirubin", side = 2, line = -1, outer = TRUE)
mtext("Survival Probability", side = 4, line = -1, outer = TRUE)

par(mfrow=c(1,1))
Competing Risks joint model
# data transform
# we first expand the PBC dataset in the competing risks long format
# with two competing risks being death and transplantation
pbc2.idCR <- crLong(pbc2.id, "status", "alive")  #alive is censoring, others are competing outcomes 
 # 1 dead
 # 0 crisk
# 0 dead
# 0 crisk
# we fit the linear mixed model as before
lmeFit.pbc <- lme(log(serBilir) ~ drug * ns(year, 3), 
                  random = list(id = pdDiag(form = ~ ns(year, 3))), data = pbc2)
 # lme(log(serBilir) ~ drug * year, random = ~ year | id, data = pbc2)
 
# however, for the survival model we need to use the data in the long
# format, and include the competing risks indicator as a stratification
# factor. We also take interactions of the baseline covariates with the
# stratification factor in order to allow the effect of these covariates
# to be different for each competing risk
coxCRFit.pbc <- coxph(Surv(years, status2) ~ (drug+sex  )*strata + strata(strata), 
                      data = pbc2.idCR, x = TRUE)
# the corresponding joint model is fitted simply by including the above
# two submodels as main arguments, setting argument CompRisk to TRUE, 
# and choosing as method = "spline-PH-aGH". Similarly as above, we also 
# include strata as an interaction factor to allow serum bilirubin to 
# have a different effect for each of the two competing risks
jmCRFit.pbc <- jointModel(lmeFit.pbc, coxCRFit.pbc, timeVar = "year", 
                          method = "spline-PH-aGH", 
                          interFact = list(value = ~ strata, data = pbc2.idCR), 
                          CompRisk = TRUE)
summary(jmCRFit.pbc)
## 
## Call:
## jointModel(lmeObject = lmeFit.pbc, survObject = coxCRFit.pbc, 
##     timeVar = "year", method = "spline-PH-aGH", interFact = list(value = ~strata, 
##         data = pbc2.idCR), CompRisk = TRUE)
## 
## Data Descriptives:
## Longitudinal Process     Event Process
## Number of Observations: 1945 Number of Events: 169 (54.2%)
## Number of Groups: 312
## 
## Joint Model Summary:
## Longitudinal Process: Linear mixed-effects model
## Event Process: Competing risks relative risk model with spline-approximated
##      baseline risk function
## Parameterization: Time-dependent 
## 
##    log.Lik      AIC      BIC
##  -1995.106 4064.213 4202.704
## 
## Variance Components:
##              StdDev
## (Intercept)  1.0017
## ns(year, 3)1 1.4857
## ns(year, 3)2 1.0767
## ns(year, 3)3 1.2696
## Residual     0.2895
## 
## Coefficients:
## Longitudinal Process
##                              Value Std.Err z-value p-value
## (Intercept)                 0.5829  0.0710  8.2088 <0.0001
## drugD-penicil              -0.1112  0.1013 -1.0973  0.2725
## ns(year, 3)1                0.8661  0.1688  5.1317 <0.0001
## ns(year, 3)2                1.5455  0.1436 10.7657 <0.0001
## ns(year, 3)3                1.4054  0.2239  6.2761 <0.0001
## drugD-penicil:ns(year, 3)1  0.2781  0.2342  1.1873  0.2351
## drugD-penicil:ns(year, 3)2 -0.4113  0.2041 -2.0150  0.0439
## drugD-penicil:ns(year, 3)3 -0.6545  0.3208 -2.0401  0.0413
## 
## Event Process
##                            Value Std.Err z-value p-value
## drugD-penicil            -0.4578  0.3848 -1.1896  0.2342
## sexfemale                 0.1889  0.5882  0.3211  0.7481
## drugD-penicil:stratadead  0.4565  0.4187  1.0903  0.2756
## sexfemale:stratadead     -0.5128  0.6255 -0.8197  0.4124
## Assoct                    1.1690  0.2023  5.7796 <0.0001
## Assoct:stratadead         0.1341  0.2231  0.6011  0.5478
## bs1(transplanted)        -9.5193  4.5778 -2.0794  0.0376
## bs2(transplanted)        -8.1512  2.3399 -3.4835  0.0005
## bs3(transplanted)        -4.3878  1.5504 -2.8301  0.0047
## bs4(transplanted)        -6.0914  1.1689 -5.2114 <0.0001
## bs5(transplanted)        -4.0520  1.1201 -3.6176  0.0003
## bs6(transplanted)        -6.1600  1.6258 -3.7889  0.0002
## bs7(transplanted)        -8.0608  5.0267 -1.6036  0.1088
## bs8(transplanted)        -7.8754  9.9272 -0.7933  0.4276
## bs9(transplanted)        -6.9720 10.9966 -0.6340  0.5261
## bs1(dead)                -3.8826  0.5398 -7.1926 <0.0001
## bs2(dead)                -4.7933  0.6567 -7.2995 <0.0001
## bs3(dead)                -3.7723  0.6524 -5.7821 <0.0001
## bs4(dead)                -4.5306  0.5670 -7.9902 <0.0001
## bs5(dead)                -4.1828  0.5942 -7.0390 <0.0001
## bs6(dead)                -4.3898  0.6561 -6.6907 <0.0001
## bs7(dead)                -1.7896  1.1384 -1.5720  0.1160
## bs8(dead)                -7.7312  2.1974 -3.5183  0.0004
## bs9(dead)                -2.4608  2.0415 -1.2054  0.2280
## 
## Integration:
## method: (pseudo) adaptive Gauss-Hermite
## quadrature points: 3 
## 
## Optimization:
## Convergence: 0
exp(confint(jmCRFit.pbc, parm = "Event"))
##                              2.5 %      est.   97.5 %
## drugD-penicil            0.2975978 0.6326842 1.345068
## sexfemale                0.3813485 1.2079056 3.825991
## drugD-penicil:stratadead 0.6947869 1.5785225 3.586328
## sexfemale:stratadead     0.1757358 0.5988409 2.040622
## Assoct                   2.1653587 3.2188271 4.784818
## Assoct:stratadead        0.7384635 1.1435115 1.770729
# https://stats.stackexchange.com/questions/427547/jointmodelbayes-output
# survfitJM() is not currently implemented for competing risks joint models.

Reference1 Reference2 Reference3