Packages

Installation

# No packages to install, so far

Loading

## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import data

abx <- read_csv("datafolder/bx.csv", col_select = -...1)
## New names:
## Rows: 12277 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (5): id, grade_group, mri, time_to_mri, age_at_bx dttm (3): bx_date, mri_date,
## last_data_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
apsa <- read_csv("datafolder/psa.csv", col_select = -...1)
## New names:
## Rows: 194195 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (3): id, psa_results, time_to_psa dttm (1): psa_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
abcr <- read_csv("datafolder/bcr.csv", col_select = -...1)
## New names:
## Rows: 2684 Columns: 19
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): after_tx dbl (16): id, grade_group, mri, time_to_tx, tx_to_bcr, tx_to_end,
## time_to_b... dttm (2): tx_date, bcr_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
atfs <- read_csv("datafolder/tfs.csv", col_select = -...1)
## New names:
## Rows: 12277 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): type_of_treatment dbl (7): id, grade_group, mri, tfs_status, tfs_time,
## tfs5_status, tfs5_time dttm (1): treatment_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

Descriptive stat

Biopsy

summary(abx)
##        id           bx_date                        grade_group   
##  Min.   :    1   Min.   :1993-09-09 00:00:00.00   Min.   :1.000  
##  1st Qu.: 3129   1st Qu.:2008-11-14 00:00:00.00   1st Qu.:1.000  
##  Median : 6265   Median :2012-11-27 00:00:00.00   Median :2.000  
##  Mean   : 6269   Mean   :2012-02-01 22:53:43.77   Mean   :2.495  
##  3rd Qu.: 9409   3rd Qu.:2016-03-31 00:00:00.00   3rd Qu.:3.000  
##  Max.   :12545   Max.   :2019-06-13 00:00:00.00   Max.   :5.000  
##                                                                  
##     mri_date                           mri          time_to_mri     
##  Min.   :1999-08-10 00:00:00.00   Min.   :0.0000   Min.   :-12.000  
##  1st Qu.:2016-08-03 00:00:00.00   1st Qu.:0.0000   1st Qu.: -1.000  
##  Median :2017-09-12 00:00:00.00   Median :0.0000   Median : -1.000  
##  Mean   :2017-05-01 01:55:21.68   Mean   :0.1454   Mean   : -1.168  
##  3rd Qu.:2018-08-09 00:00:00.00   3rd Qu.:0.0000   3rd Qu.: -1.000  
##  Max.   :2019-06-07 00:00:00.00   Max.   :1.0000   Max.   : 28.000  
##  NA's   :10492                                     NA's   :10492    
##    age_at_bx     last_data_date                  
##  Min.   :37.00   Min.   :1994-11-07 00:00:00.00  
##  1st Qu.:63.00   1st Qu.:2017-02-20 13:37:00.00  
##  Median :68.00   Median :2020-09-14 09:12:00.00  
##  Mean   :67.71   Mean   :2018-05-02 13:55:13.81  
##  3rd Qu.:73.00   3rd Qu.:2021-01-20 11:19:00.00  
##  Max.   :80.00   Max.   :2021-05-05 00:00:00.00  
## 
# Bar plot for grade_group
barplot(table(abx$grade_group), main = "Distribution of grade_group", xlab = "Grade Group", ylab = "Count")

# Box plot for time_to_mri
boxplot(abx$time_to_mri, main = "Distribution of time_to_mri", ylab = "Time to MRI")

# Histogram for age_at_bx
hist(abx$age_at_bx, main = "Distribution of Age at Biopsy", xlab = "Age", ylab = "Frequency")

# Histogram for mri
hist(abx$mri, main = "Distribution of MRI", xlab = "MRI", ylab = "Frequency")

# Convert datetime variables to Date format
abx$bx_date <- as.Date(abx$bx_date)
abx$mri_date <- as.Date(abx$mri_date)

# Plot bx_date
ggplot(abx, aes(x = bx_date)) + 
  geom_histogram(binwidth = 30, color = "black", fill = "blue") + 
  labs(title = "Distribution of bx_date", x = "Date", y = "Frequency")

# Plot mri_date
ggplot(abx, aes(x = mri_date)) + 
  geom_histogram(binwidth = 30, color = "black", fill = "green") + 
  labs(title = "Distribution of mri_date", x = "Date", y = "Frequency")
## Warning: Removed 10492 rows containing non-finite values (`stat_bin()`).

PSA

summary(apsa)
##        id         psa_results          psa_date                     
##  Min.   :    1   Min.   :    0.00   Min.   :1998-05-26 07:01:00.00  
##  1st Qu.: 3031   1st Qu.:    0.08   1st Qu.:2011-01-03 11:56:00.00  
##  Median : 6098   Median :    1.10   Median :2015-01-21 10:17:00.00  
##  Mean   : 6096   Mean   :   14.77   Mean   :2014-04-03 21:36:56.91  
##  3rd Qu.: 9119   3rd Qu.:    5.80   3rd Qu.:2018-03-13 07:45:00.00  
##  Max.   :12227   Max.   :76800.00   Max.   :2021-04-08 08:09:00.00  
##   time_to_psa     
##  Min.   :-244.00  
##  1st Qu.:   4.00  
##  Median :  25.00  
##  Mean   :  30.67  
##  3rd Qu.:  60.00  
##  Max.   : 284.00
# Convert datetime variable to POSIXct format
apsa$psa_date <- as.POSIXct(apsa$psa_date)

# Plot frequency of psa_date
apsa$year <- format(apsa$psa_date, "%Y")
hist(as.numeric(apsa$year), main = "Frequency of PSA tests", xlab = "Date", ylab = "Frequency", breaks = 30)

# Apply log transformation to psa_results
apsa$log_psa_results <- log(apsa$psa_results + 1)
hist(as.numeric(apsa$log_psa_results)[apsa$time_to_psa==0], main = "Dist LOG PSA at baseline (ref)", xlab = "log(PSA+1)", ylab = "Frequency", breaks = 30)

# Calculate the average log_psa_result
average_log_psa <- mean(apsa$log_psa_results[apsa$time_to_psa == 0])

# Center the log_psa_result around its average
centered_log_psa <- apsa$log_psa_results - average_log_psa

# Plot the histogram
hist(centered_log_psa, main = "Centered Dist LOG PSA at baseline", xlab = "log(PSA+1) - avg", ylab = "Frequency", breaks = 30)

hist(as.numeric(apsa$psa_results)[apsa$time_to_psa==0 & apsa$psa_results<100], main = "Dist PSA at baseline", xlab = "PSA < 100 at baseline", ylab = "Frequency", breaks = 10)

# Plot histogram for Time to PSA
hist(apsa$time_to_psa, main = "Distribution of Time to PSA", xlab = "Time to PSA", ylab = "Frequency")

# Plot histogram for + Time to PSA
hist(apsa$time_to_psa[apsa$time_to_psa>=0], main = "Distribution of +Time to PSA", xlab = "+Time to PSA", ylab = "Frequency")

# Plot histogram for + Time to PSA
hist(log(apsa$time_to_psa+1)[apsa$time_to_psa>=0], main = "Distribution of LOG +Time to PSA", xlab = "+Time to PSA", ylab = "Frequency")
## Warning in log(apsa$time_to_psa + 1): NaNs produced

Spaghetti Plots

# Plot log(psa_results + 0.1) against time_to_psa as spaghetti plot
matplot(apsa$time_to_psa, apsa$log_psa_results, type = "l", main = "Spaghetti Plot of Log Transformed PSA Results Over Time", xlab = "Time to psa", ylab = "Log(PSA + 0.1)", col = 1:nrow(apsa), lty = 1)

# Plot log(psa_results + 0.1) against time_to_psa>0 as spaghetti plot
pos_apsa <- apsa[apsa$time_to_psa>0,]
matplot(pos_apsa$time_to_psa, pos_apsa$log_psa_results, type = "l", main = "Spaghetti Plot of Log Post-Bx Transformed PSA Results Over Time", xlab = "Time to psa", ylab = "Log(PSA + 0.1)", col = 1:nrow(pos_apsa), lty = 1)

# ggplot2, nicer plots
meta_apsa <- merge(pos_apsa,
                   abx[, c('id', 'grade_group', 'mri','age_at_bx')], on='id', all=TRUE)
p <- ggplot(data = meta_apsa, aes(x = time_to_psa, y = log_psa_results, group = id, colour=grade_group))

# all
p + 
  geom_line() + 
  stat_smooth(aes(group = 1), color='red') + 
  stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3)
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 458 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 458 rows containing missing values (`geom_line()`).

# By GG
p + 
  geom_line() + 
  stat_smooth(aes(group = 1), color='red') + 
  #stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3, color='yellow')+ 
  facet_grid(. ~ grade_group)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).
## Removed 458 rows containing missing values (`geom_line()`).

p + 
  geom_line() + 
  stat_smooth(aes(group = 1), color='red') + 
  #stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3, color='yellow')+ 
  facet_grid(. ~ mri)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).
## Removed 458 rows containing missing values (`geom_line()`).

Biochemical recurrence (BCR)

summary(abcr)
##        id         grade_group         mri           after_tx        
##  Min.   :    2   Min.   :1.000   Min.   :0.0000   Length:2684       
##  1st Qu.: 2921   1st Qu.:2.000   1st Qu.:0.0000   Class :character  
##  Median : 6002   Median :3.000   Median :0.0000   Mode  :character  
##  Mean   : 5989   Mean   :2.865   Mean   :0.1073                     
##  3rd Qu.: 8993   3rd Qu.:4.000   3rd Qu.:0.0000                     
##  Max.   :12224   Max.   :5.000   Max.   :1.0000                     
##     tx_date                         time_to_tx       tx_to_bcr     
##  Min.   :1999-09-06 08:44:00.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:2009-09-09 18:00:00.00   1st Qu.:  3.00   1st Qu.: 16.00  
##  Median :2012-11-30 00:00:00.00   Median :  4.00   Median : 34.00  
##  Mean   :2012-07-06 19:29:22.77   Mean   : 11.91   Mean   : 42.72  
##  3rd Qu.:2015-11-16 06:00:00.00   3rd Qu.:  7.00   3rd Qu.: 60.00  
##  Max.   :2020-04-16 00:00:00.00   Max.   :177.00   Max.   :194.00  
##    tx_to_end         bcr_date                       time_to_bcr    
##  Min.   :  2.00   Min.   :2000-07-24 08:00:00.00   Min.   :  2.00  
##  1st Qu.: 41.00   1st Qu.:2012-12-18 07:43:30.00   1st Qu.: 24.00  
##  Median : 69.00   Median :2016-01-17 00:00:00.00   Median : 44.00  
##  Mean   : 75.92   Mean   :2015-06-15 20:45:10.81   Mean   : 54.63  
##  3rd Qu.:107.00   3rd Qu.:2018-07-30 20:15:45.00   3rd Qu.: 74.00  
##  Max.   :238.00   Max.   :2021-04-07 11:10:00.00   Max.   :226.00  
##   time_to_end       rfs_status    rfs_time       drfs_status   drfs_time     
##  Min.   :  3.00   Min.   :1    Min.   :  0.00   Min.   :1    Min.   :  2.00  
##  1st Qu.: 50.00   1st Qu.:1    1st Qu.: 16.00   1st Qu.:1    1st Qu.: 24.00  
##  Median : 80.00   Median :1    Median : 34.00   Median :1    Median : 44.00  
##  Mean   : 87.83   Mean   :1    Mean   : 42.72   Mean   :1    Mean   : 54.63  
##  3rd Qu.:120.00   3rd Qu.:1    3rd Qu.: 60.00   3rd Qu.:1    3rd Qu.: 74.00  
##  Max.   :265.00   Max.   :1    Max.   :194.00   Max.   :1    Max.   :226.00  
##   rfs5_status       rfs5_time      drfs5_status      drfs5_time   
##  Min.   :0.0000   Min.   : 0.00   Min.   :0.0000   Min.   : 2.00  
##  1st Qu.:1.0000   1st Qu.:16.00   1st Qu.:0.0000   1st Qu.:24.00  
##  Median :1.0000   Median :34.00   Median :1.0000   Median :44.00  
##  Mean   :0.7511   Mean   :34.78   Mean   :0.6505   Mean   :41.06  
##  3rd Qu.:1.0000   3rd Qu.:60.00   3rd Qu.:1.0000   3rd Qu.:60.00  
##  Max.   :1.0000   Max.   :60.00   Max.   :1.0000   Max.   :60.00
barplot(table(abcr$grade_group), main = "Distribution of grade_group", xlab = "Grade Group", ylab = "Count")

table(abcr$grade_group)
## 
##   1   2   3   4   5 
## 606 555 638 365 520

Treatment

summary(atfs)
##        id         grade_group         mri        
##  Min.   :    1   Min.   :1.000   Min.   :0.0000  
##  1st Qu.: 3129   1st Qu.:1.000   1st Qu.:0.0000  
##  Median : 6265   Median :2.000   Median :0.0000  
##  Mean   : 6269   Mean   :2.495   Mean   :0.1454  
##  3rd Qu.: 9409   3rd Qu.:3.000   3rd Qu.:0.0000  
##  Max.   :12545   Max.   :5.000   Max.   :1.0000  
##                                                  
##  treatment_date                   type_of_treatment    tfs_status    
##  Min.   :1999-06-07 11:43:00.00   Length:12277       Min.   :0.0000  
##  1st Qu.:2010-10-26 00:00:00.00   Class :character   1st Qu.:1.0000  
##  Median :2014-02-20 00:00:00.00   Mode  :character   Median :1.0000  
##  Mean   :2013-08-09 03:45:34.11                      Mean   :0.7764  
##  3rd Qu.:2016-12-21 00:00:00.00                      3rd Qu.:1.0000  
##  Max.   :2021-04-29 00:00:00.00                      Max.   :1.0000  
##  NA's   :2745                                                        
##     tfs_time       tfs5_status       tfs5_time   
##  Min.   :  0.00   Min.   :0.0000   Min.   : 0.0  
##  1st Qu.:  2.00   1st Qu.:0.0000   1st Qu.: 2.0  
##  Median :  4.00   Median :1.0000   Median : 4.0  
##  Mean   : 24.66   Mean   :0.7327   Mean   :16.4  
##  3rd Qu.: 25.00   3rd Qu.:1.0000   3rd Qu.:25.0  
##  Max.   :259.00   Max.   :1.0000   Max.   :60.0  
## 
table(atfs$grade_group)
## 
##    1    2    3    4    5 
## 3652 3313 2503 1195 1614
table(atfs$mri)
## 
##     0     1 
## 10492  1785

Modelling

Import INLA and INLAjoint

library(INLA)
## Loading required package: Matrix
## Loading required package: sp
## This is INLA_24.02.09 built 2024-02-09 03:43:24 UTC.
##  - See www.r-inla.org/contact-us for how to get help.
##  - List available models/likelihoods/etc with inla.list.models()
##  - Use inla.doc(<NAME>) to access documentation
#inla.update(testing=TRUE)
library(INLAjoint)
## Package 'INLAjoint' version 24.2.5
## Type 'citation("INLAjoint")' for citing this R package in publications.

Data prepration

# First I have to combine (back) the tfs and bcr data in one dataframe
surv_data <- merge(abcr[, c('id', 'drfs_status', 'drfs_time', 'after_tx')],
                   abx[, c('id', 'grade_group', 'mri','age_at_bx')], on='id', all=TRUE)

surv_data <- merge(surv_data,
                   atfs[, c('id', 'type_of_treatment', 'tfs_status', 'tfs_time')], on='id', all=TRUE)
# New ID
#surv_data$id <- 1:nrow(surv_data)

# Re-Order
#surv_data <- surv_data[, c(6, 5, 4, 3, 2)]

# Remove where both time to treatment/bcr = 0 and event = 0 (this should be update when converting time to days)
all_null_tfs <- surv_data[surv_data$tfs_time==0 & surv_data$tfs_status==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_tfs$id), ]
all_null_drfs <- surv_data[surv_data$drfs_time==0 & surv_data$drfs_status==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_drfs$id), ]

# Now if no BCR data then rfs status = 0 and rfs time = tfs time [to fill out the NULL cells]
surv_data$drfs_status <- ifelse(is.na(surv_data$drfs_status), 0, surv_data$drfs_status)
surv_data$drfs_time <- ifelse(is.na(surv_data$drfs_time), surv_data$tfs_time, surv_data$drfs_time)

# Remove patients when time to tx/bcr = 0
all_null_tfs_drfs <- surv_data[surv_data$tfs_time==0 & surv_data$drfs_time==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_tfs_drfs$id), ]

# if the tfs_status = 1 and after tx ????

M-1: Treatment risk

M1 <- joint(formSurv =list(inla.surv(time = tfs_time, event = tfs_status) ~ -1),
            basRisk = c("rw2"),  
            dataSurv = list(surv_data))
summary(M1)
## 
## Survival outcome
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 1.4005 0.4296     0.7507    1.335     2.4293
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -25069.39                             -25069.39 
## 
## Deviance Information Criterion:  50050.01
## Widely applicable Bayesian information criterion:  50050.38
## Computation time: 1.32 seconds
plot(M1)
## $Baseline

## 
## attr(,"class")
## [1] "plot.INLAjoint" "list"

M-2: BCR risk

M2 <- joint(formSurv = list(inla.surv(time = drfs_time, event = drfs_status) ~ -1),
            basRisk = c("rw2"),
            dataSurv = list(surv_data))
summary(M2)
## 
## Survival outcome
##                           mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.375 0.1596     0.1562    0.344      0.776
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -11180.02                             -11180.02 
## 
## Deviance Information Criterion:  22294.39
## Widely applicable Bayesian information criterion:  22294.2
## Computation time: 1.32 seconds
plot(M2)
## $Baseline

## 
## attr(,"class")
## [1] "plot.INLAjoint" "list"
plot(M2)$Baseline+scale_y_log10()

JM-1: Treatment-BCR joint risk model

JM1 <- joint(formSurv=list(inla.surv(time = tfs_time, event = tfs_status) ~ -1+(1|id),
                           inla.surv(time = drfs_time, event = drfs_status) ~ -1), id="id",
             basRisk=c("rw2", "rw2"), assocSurv=TRUE, NbasRisk = 30,
             
             dataSurv = list(surv_data))
## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Max id is 12541 but there are only 11577 individuals with longitudinal
## records, I'm reassigning ids
## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : The hyperparameters skewness correction seems abnormal, this can be a sign
## of an ill-defined model and/or issues with the fit.
## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Stupid local search strategy used: This can be a sign of a ill-defined
## model and/or non-informative data.
summary(JM1)
## 
## Survival outcome (S1)
##                              mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 2.004 0.5085     1.1858   1.9437     3.1764
## 
## Frailty term variance (S1)
##                  mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 0.9765 0.1155     0.7769   0.9666     1.2302
## 
## Survival outcome (S2)
##                               mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 1.3623 0.3328     0.8618   1.3083      2.162
## 
## Association survival - survival
##                     mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 3.7982 0.2264     3.3352    3.804     4.2261
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -36738.35                             -36738.35 
## 
## Deviance Information Criterion:  74905.69
## Widely applicable Bayesian information criterion:  31895535
## Computation time: 23.84 seconds
plot(JM1)$Baseline+scale_y_log10()

# Grade group as a factor
surv_data$grade_group <- as.factor(surv_data$grade_group)

# MRI as a factor
surv_data$mri <- as.factor(surv_data$mri)

# drfs status and tfs_statusas a factor
surv_data$drfs_status <- as.factor(surv_data$drfs_status)
surv_data$tfs_status <- as.factor(surv_data$tfs_status)

# Classify 'Age' to: a<=65 , 65<a<=70 , 70>a
# Create a new column class_age_at_bx as a factor with 3 levels
surv_data$class_age_at_bx <- cut(surv_data$age_at_bx, 
                                 breaks = c(-Inf, 60, 70, Inf), 
                                 labels = c("<60", "60-70", ">70"),
                                 include.lowest = TRUE)

# Print a summary of the new attribute
table(surv_data$class_age_at_bx)
## 
##   <60 60-70   >70 
##  2056  5096  4425
# Summary of the data
summary(surv_data)
##        id        drfs_status   drfs_time        after_tx         grade_group
##  Min.   :    1   0:8893      Min.   :  1.00   Length:11577       1:3513     
##  1st Qu.: 3054   1:2684      1st Qu.:  3.00   Class :character   2:3182     
##  Median : 6124               Median : 14.00   Mode  :character   3:2397     
##  Mean   : 6121               Mean   : 36.13                      4:1116     
##  3rd Qu.: 9178               3rd Qu.: 52.00                      5:1369     
##  Max.   :12541               Max.   :259.00                                 
##  mri        age_at_bx     type_of_treatment  tfs_status    tfs_time     
##  0:9858   Min.   :39.00   Length:11577       0:2342     Min.   :  0.00  
##  1:1719   1st Qu.:63.00   Class :character   1:9235     1st Qu.:  2.00  
##           Median :68.00   Mode  :character              Median :  4.00  
##           Mean   :67.61                                 Mean   : 26.15  
##           3rd Qu.:73.00                                 3rd Qu.: 28.00  
##           Max.   :80.00                                 Max.   :259.00  
##  class_age_at_bx
##  <60  :2056     
##  60-70:5096     
##  >70  :4425     
##                 
##                 
## 

JM-2: Account for: all

JM2 <- joint(formSurv=list(inla.surv(time = tfs_time, event = tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx+(1|id),
                           inla.surv(time = drfs_time, event = drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx), id="id",
             basRisk=c("rw2", "rw2"), assocSurv=TRUE, NbasRisk = 30,
             
             dataSurv = list(surv_data))
## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Max id is 12541 but there are only 11577 individuals with longitudinal
## records, I'm reassigning ids
summary(JM2)
## 
## Survival outcome (S1)
##                               mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 0.3945 0.0364     0.3440   0.3876     0.4825
## grade_group2_S1             1.3541 0.0558     1.2446   1.3541     1.4636
## grade_group3_S1             1.8129 0.0610     1.6934   1.8129     1.9325
## grade_group4_S1             1.7034 0.0785     1.5494   1.7034     1.8574
## grade_group5_S1             2.3105 0.0741     2.1653   2.3104     2.4558
## mri1_S1                     0.9038 0.0595     0.7871   0.9038     1.0206
## class_age_at_bx6070_S1      0.3481 0.0590     0.2324   0.3481     0.4637
## class_age_at_bx70_S1        0.5702 0.0609     0.4507   0.5702     0.6897
## after_txrt_S1               0.4825 0.0584     0.3680   0.4825     0.5971
## 
## Frailty term variance (S1)
##                  mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 4.4795 0.0958     4.3034   4.4749     4.6791
## 
## Survival outcome (S2)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2  0.3576 0.0270     0.3143   0.3541     0.4192
## grade_group2_S2              1.5567 0.0766     1.4067   1.5567     1.7070
## grade_group3_S2              1.9750 0.0837     1.8111   1.9750     2.1392
## grade_group4_S2              1.9112 0.1076     1.7003   1.9112     2.1224
## grade_group5_S2              2.6799 0.1019     2.4803   2.6798     2.8799
## mri1_S2                      1.4163 0.0819     1.2557   1.4162     1.5771
## class_age_at_bx6070_S2       0.4339 0.0807     0.2757   0.4339     0.5923
## class_age_at_bx70_S2         0.8903 0.0834     0.7267   0.8902     1.0540
## after_txrt_S2               -2.4713 0.0815    -2.6314  -2.4712    -2.3116
## 
## Association survival - survival
##                     mean   sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.3782 0.02     1.3359   1.3791     1.4144
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                              -81382.8                              -81382.8 
## 
## Deviance Information Criterion:  141085
## Widely applicable Bayesian information criterion:  142542.4
## Computation time: 20.3 seconds
plot(JM2)$Baseline+scale_y_log10()

Note:

  • This plots are not so affirmative the way it is presented, we can transform them to the survival to make more sense.

  • The longer time to treatment the longer time to relapse, which is expected as this data include several effecting covariates (Confounder), such as age and Gleason grade group.

V2: To survival Curves

M1_surv <- joint(formSurv=list(inla.surv(time =tfs_time, event = tfs_status) ~ -1),
             basRisk=c("rw2"),  dataSurv = surv_data)
summary(M1_surv)
## 
## Survival outcome
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 1.1258 0.3376     0.6126   1.0751     1.9322
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -48079.76                             -48079.76 
## 
## Deviance Information Criterion:  96063.18
## Widely applicable Bayesian information criterion:  96069.51
## Computation time: 1.25 seconds
#plot(M1_surv)
onePatient <- surv_data[1, ]
P <- predict(M1_surv, onePatient, id="id", horizon=300, surv=TRUE)$PredS
## Warning in predict.INLAjoint(M1_surv, onePatient, id = "id", horizon = 300, :
## The fitted model has baseline risk information up until value 259 for survival
## outcome 1. Since you ask for prediction at horizon 300 I will assume constant
## baseline hazard beyond the maximum available value. Alternatively, you can use
## baselineHaz='smooth' to use splines to predict the baseline hazard (for each
## sample). Alternatively, adding 'horizon' in the control options of the inla()
## call allows to extend the baseline beyond the last observed event time (linear
## extension, less flexible than the smooth method).
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 1 on PID: 31741
## Computing survival predictions for individual 1
# plot survival curve for the two time to event outcomes
plot(P$time, P$Surv_quant0.5, type="l", lwd=2, ylim=c(0, 1), xlab="time", ylab="survival probability")
lines(P$time, P$Surv_quant0.025, lty=2)
lines(P$time, P$Surv_quant0.975, lty=2)
# add observed event times
# sapply(surv_data[surv_data$tfs_status==1, "tfs_time"], function(x) abline(v=x, lty=3, lwd=0.5))

M2_surv <- joint(formSurv=list(inla.surv(time = drfs_time, event = drfs_status) ~ -1),
            basRisk=c("rw2"),  dataSurv = surv_data)
summary(M2_surv)
## 
## Survival outcome
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.4468 0.1685     0.2063   0.4168     0.8625
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -41586.44                             -41586.44 
## 
## Deviance Information Criterion:  83092.81
## Widely applicable Bayesian information criterion:  83097.62
## Computation time: 1.3 seconds
#plot(M2_surv)
P2 <- predict(M2_surv, onePatient, id="id", horizon=300, surv=TRUE)$PredS
## Warning in predict.INLAjoint(M2_surv, onePatient, id = "id", horizon = 300, :
## The fitted model has baseline risk information up until value 259 for survival
## outcome 1. Since you ask for prediction at horizon 300 I will assume constant
## baseline hazard beyond the maximum available value. Alternatively, you can use
## baselineHaz='smooth' to use splines to predict the baseline hazard (for each
## sample). Alternatively, adding 'horizon' in the control options of the inla()
## call allows to extend the baseline beyond the last observed event time (linear
## extension, less flexible than the smooth method).
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 1 on PID: 31741
## Computing survival predictions for individual 1
lines(P2$time, P2$Surv_quant0.5, lwd=2, col=2)
lines(P2$time, P2$Surv_quant0.025, lty=2, col=2)
lines(P2$time, P2$Surv_quant0.975, lty=2, col=2)
# sapply(surv_data[surv_data$drfs_time==1, "drfs_time"], function(x) abline(v=x, lty=3, lwd=0.5, col=2))
legend("topright", c("Treatment", "Relapse"), lwd=2, col=1:2)

V3: Including longitudinal data

Prepare data: Merging

# Add Longitudinal data (PSA) to the surv_Data
surv_long_data <- merge(surv_data, apsa[, c('id', 'time_to_psa', 'psa_results','log_psa_results')], by='id')

# Remove NaNs from treatment related cols
# Replace NA values with 'unk'
surv_long_data$after_tx <- ifelse(is.na(surv_long_data$after_tx), 'unk', surv_long_data$after_tx)
surv_long_data$type_of_treatment <- ifelse(is.na(surv_long_data$type_of_treatment), 'unk', surv_long_data$type_of_treatment)

# Convert to factor
surv_long_data$after_tx <- factor(surv_long_data$after_tx)
surv_long_data$type_of_treatment <- factor(surv_long_data$type_of_treatment)

# Uncomment when needed, Sampling
unique_ids <- unique(surv_long_data$id)
sample_size <- round(0.01 * length(unique_ids))

# Sample X.% of the unique IDs
sampled_ids <- sample(unique_ids, sample_size, replace = FALSE)

# Subset the data based on sampled IDs
surv_long_data <- surv_long_data[surv_long_data$id %in% c(sampled_ids, 25), ]

# Reassigning ID
# -> Calculate run-length encoding of the original id column
id_rle <- rle(surv_long_data$id)

# -> Generate a sequence of IDs based on the lengths of runs in the original id column
new_id <- rep(seq_along(id_rle$lengths), id_rle$lengths)

# -> Reassign the new IDs to the id column in surv_long_data
surv_long_data$id <- new_id

# Surv data to be used in the latter join model 
surv_nolong_data <- surv_long_data[!duplicated(surv_long_data$id), ]

summary(surv_long_data)
##        id         drfs_status   drfs_time      after_tx   grade_group mri     
##  Min.   :  1.00   0:1321      Min.   :  1.00   rp : 150   1:604       0:1364  
##  1st Qu.: 29.00   1: 398      1st Qu.:  4.00   rt : 248   2:541       1: 355  
##  Median : 58.00               Median : 18.00   unk:1321   3:314               
##  Mean   : 57.39               Mean   : 40.42              4:128               
##  3rd Qu.: 86.00               3rd Qu.: 54.00              5:132               
##  Max.   :117.00               Max.   :231.00                                  
##    age_at_bx     type_of_treatment tfs_status    tfs_time      class_age_at_bx
##  Min.   :49.00   chemo:  0         0: 373     Min.   :  0.00   <60  :311      
##  1st Qu.:62.00   horm :335         1:1346     1st Qu.:  3.00   60-70:873      
##  Median :67.00   radio:556                    Median :  5.00   >70  :535      
##  Mean   :66.91   rp   :455                    Mean   : 33.24                  
##  3rd Qu.:72.00   unk  :373                    3rd Qu.: 40.00                  
##  Max.   :80.00                                Max.   :231.00                  
##   time_to_psa       psa_results       log_psa_results  
##  Min.   :-156.00   Min.   :   0.000   Min.   :0.00000  
##  1st Qu.:   4.00   1st Qu.:   0.090   1st Qu.:0.08618  
##  Median :  24.00   Median :   1.160   Median :0.77011  
##  Mean   :  31.07   Mean   :  13.614   Mean   :1.12239  
##  3rd Qu.:  57.00   3rd Qu.:   5.955   3rd Qu.:1.93946  
##  Max.   : 231.00   Max.   :3260.000   Max.   :8.08979

Modelling : all PSAs (-/+)

library(splines)

# We applied log transformation to PSA with shift=1

# Longitudinal Data should be sorted for each patient.
surv_long_data <- surv_long_data[with(surv_long_data, order(id, time_to_psa)), ]

# First joint model for longitudinal marker PSA
LM1 <- joint(formLong=log_psa_results ~ time_to_psa + (1 + time_to_psa | id), 
             timeVar="time_to_psa",
             id="id", 
             family="gaussian", 
             control=list(int.strategy="eb"), # To make calculation faster, use only the mean for the hyper-parameter distributions
             dataLong=surv_long_data
             )
summary(LM1)
## Longitudinal outcome (gaussian)
##                         mean     sd 0.025quant 0.5quant 0.975quant
## Intercept             1.3393 0.0734     1.1954   1.3393     1.4832
## time_to_psa          -0.0147 0.0089    -0.0322  -0.0147     0.0029
## Res. err. (variance)  0.4463 0.0162     0.4155   0.4460     0.4791
## 
## Random effects variance-covariance (L1)
##                          mean     sd 0.025quant 0.5quant 0.975quant
## Intercept              0.5571 0.0821     0.4157   0.5502     0.7336
## time_to_psa            0.0092 0.0012     0.0071   0.0091     0.0118
## Intercept:time_to_psa -0.0013 0.0071    -0.0158  -0.0007     0.0119
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -2252.920                             -2248.957 
## 
## Deviance Information Criterion:  3686.216
## Widely applicable Bayesian information criterion:  3743.282
## Computation time: 1.33 seconds
# 643:42 , 1030:41 , 321:37 , 553:37 ,  938:36 ,  444:36 , 405:33 -> these are some patients with large enough data
patientID = 25 # pick one
patient_counts <- table(surv_long_data$id)
max_count <- max(patient_counts)
patients_with_max_count <- names(patient_counts[patient_counts == max_count])
patientID = patients_with_max_count

ND <- surv_long_data[surv_long_data$id==patientID,]
P1 <- predict(LM1, ND, id="id", horizon=max(surv_long_data$time_to_psa))$PredL # Make Linear prediction
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 74 on PID: 31741
# To plot, should run from here
plot(P1$time_to_psa, P1$quant0.5, type="l", lwd=2, ylim=range(c(P1$quant0.025, P1$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P1$time_to_psa, P1$quant0.025, lty=2)
lines(P1$time_to_psa, P1$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19) # to here , pch=size of the dots

# use splines?
NSplines <- ns(surv_long_data$time_to_psa, knots=c(25)) # natural cubic splines , knots, the starting rise time, look at time to psa summary
f1 <- function(x) predict(NSplines, x)[,1] # first basis
f2 <- function(x) predict(NSplines, x)[,2] # second basis
# check splines
curve(f1, xlim=range(surv_long_data$time_to_psa), ylim=c(-1,1))
curve(f2, xlim=range(surv_long_data$time_to_psa), add=T)

# Second joint model for the longitudinal data, this time accounting for all
LM2 <- joint(formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
             timeVar="time_to_psa",
             id="id", 
             family="gaussian", 
             control=list(int.strategy="eb"), 
             dataLong=surv_long_data
            )
summary(LM2)
## Longitudinal outcome (gaussian)
##                               mean     sd 0.025quant 0.5quant 0.975quant
## Intercept                   1.2115 0.4835     0.2639   1.2115     2.1592
## f1time_to_psa              -2.4374 0.7851    -3.9762  -2.4374    -0.8986
## f2time_to_psa              -4.6243 1.8254    -8.2021  -4.6243    -1.0465
## grade_group2                0.7210 0.5707    -0.3976   0.7210     1.8396
## grade_group3               -0.9281 0.6808    -2.2624  -0.9281     0.4063
## grade_group4                1.4141 0.9685    -0.4842   1.4141     3.3124
## grade_group5               -0.4363 0.8244    -2.0521  -0.4363     1.1796
## mri1                       -0.1095 0.1365    -0.3770  -0.1095     0.1580
## class_age_at_bx6070         0.2621 0.1779    -0.0866   0.2621     0.6107
## class_age_at_bx70           0.5424 0.1857     0.1784   0.5424     0.9064
## after_txrt                  0.8043 0.2904     0.2351   0.8043     1.3734
## after_txunk                 0.1688 0.2218    -0.2660   0.1688     0.6036
## f1time_to_psa:grade_group2 -3.7616 1.0947    -5.9071  -3.7616    -1.6161
## f1time_to_psa:grade_group3 -1.3502 1.2783    -3.8557  -1.3502     1.1553
## f1time_to_psa:grade_group4 -2.3859 1.8599    -6.0312  -2.3859     1.2594
## f1time_to_psa:grade_group5  1.2357 1.5673    -1.8362   1.2357     4.3076
## f2time_to_psa:grade_group2 -3.1997 2.7304    -8.5512  -3.1997     2.1518
## f2time_to_psa:grade_group3 -5.2452 3.2391   -11.5937  -5.2452     1.1033
## f2time_to_psa:grade_group4  2.2777 4.3371    -6.2229   2.2777    10.7783
## f2time_to_psa:grade_group5  1.7360 4.3266    -6.7440   1.7360    10.2159
## Res. err. (variance)        0.4028 0.0158     0.3722   0.4027     0.4341
## 
## Random effects variance-covariance (L1)
##                                 mean      sd 0.025quant 0.5quant 0.975quant
## Intercept                     3.9518  3.5905     1.1649   2.9877    12.1458
## f1time_to_psa                13.2831  3.8321     7.6596  12.7082    22.4594
## f2time_to_psa               134.9731 28.0316    87.8352 132.3635   200.5550
## Intercept:f1time_to_psa       0.4751  3.2939    -7.6999   1.2702     3.7994
## Intercept:f2time_to_psa      18.1205  7.5477     8.1619  16.7820    36.4451
## f1time_to_psa:f2time_to_psa  26.7543  9.9779     5.2259  27.0630    45.1522
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -2108.808                             -2102.088 
## 
## Deviance Information Criterion:  3487.443
## Widely applicable Bayesian information criterion:  3555.614
## Computation time: 2.16 seconds
# Again, pick one patient (here the same)
ND2 <- surv_long_data[surv_long_data$id==patientID,] # observed vs. fitted for a couple individuals.
P2 <- predict(LM2, ND, id="id", horizon=max(surv_long_data$time_to_psa))$PredL
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 74 on PID: 31741
plot(P2$time_to_psa, P2$quant0.5, type="l", lwd=2, ylim=range(c(P2$quant0.025, P2$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P2$time_to_psa, P2$quant0.025, lty=2)
lines(P2$time_to_psa, P2$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19)

#
inla.setOption(num.threads='8:1')
# Third joint model, combinning 2 survival modals and the longitudinal one 
LJM1 <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx  + (1|id),
                          inla.surv(drfs_time, drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx),
            formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
            id="id",
            timeVar="time_to_psa",
            basRisk=c("rw2", "rw2"), # random walk
            assocSurv=TRUE,
            family="gaussian", 
            assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
            control=list(int.strategy="eb"), 
            dataSurv=list(surv_nolong_data), 
            dataLong=surv_long_data
            )
summary(LJM1)
## Longitudinal outcome (gaussian)
##                                  mean     sd 0.025quant 0.5quant 0.975quant
## Intercept_L1                   1.2444 0.4767     0.3102   1.2444     2.1787
## f1time_to_psa_L1              -2.4534 0.7825    -3.9871  -2.4534    -0.9197
## f2time_to_psa_L1              -4.5632 1.7988    -8.0887  -4.5632    -1.0376
## grade_group2_L1                0.6752 0.5605    -0.4233   0.6752     1.7738
## grade_group3_L1               -0.9331 0.6703    -2.2469  -0.9331     0.3808
## grade_group4_L1                1.3646 0.9564    -0.5098   1.3646     3.2390
## grade_group5_L1               -0.4683 0.8111    -2.0581  -0.4683     1.1214
## mri1_L1                       -0.1331 0.1349    -0.3975  -0.1331     0.1312
## class_age_at_bx6070_L1         0.2623 0.1773    -0.0852   0.2623     0.6098
## class_age_at_bx70_L1           0.5376 0.1848     0.1755   0.5376     0.8997
## after_txrt_L1                  0.8004 0.2872     0.2375   0.8004     1.3633
## after_txunk_L1                 0.1612 0.2194    -0.2687   0.1612     0.5912
## f1time_to_psa:grade_group2_L1 -3.6727 1.0906    -5.8103  -3.6727    -1.5352
## f1time_to_psa:grade_group3_L1 -1.3250 1.2754    -3.8248  -1.3250     1.1748
## f1time_to_psa:grade_group4_L1 -2.2551 1.8558    -5.8924  -2.2551     1.3822
## f1time_to_psa:grade_group5_L1  1.2983 1.5668    -1.7725   1.2983     4.3691
## f2time_to_psa:grade_group2_L1 -3.2060 2.6914    -8.4810  -3.2060     2.0691
## f2time_to_psa:grade_group3_L1 -5.2278 3.1932   -11.4865  -5.2278     1.0308
## f2time_to_psa:grade_group4_L1  2.3189 4.2829    -6.0754   2.3189    10.7131
## f2time_to_psa:grade_group5_L1  1.6850 4.2736    -6.6911   1.6850    10.0610
## Res. err. (variance)           0.4032 0.0157     0.3735   0.4028     0.4352
## 
## Random effects variance-covariance (L1)
##                                       mean      sd 0.025quant 0.5quant
## Intercept_L1                        3.4037  2.4991     1.1437   2.8060
## f1time_to_psa_L1                   13.0074  3.6435     7.5592  12.5538
## f2time_to_psa_L1                  127.2802 23.6713    85.3376 125.8892
## Intercept_L1:f1time_to_psa_L1       0.5998  2.4390    -4.7301   1.1661
## Intercept_L1:f2time_to_psa_L1      16.6927  6.0326     7.9048  15.8057
## f1time_to_psa_L1:f2time_to_psa_L1  25.8435  8.4248     8.7672  25.9323
##                                   0.975quant
## Intercept_L1                          8.9143
## f1time_to_psa_L1                     21.6029
## f2time_to_psa_L1                    177.4183
## Intercept_L1:f1time_to_psa_L1         3.0623
## Intercept_L1:f2time_to_psa_L1        31.2052
## f1time_to_psa_L1:f2time_to_psa_L1    42.6080
## 
## Survival outcome (S1)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1  0.0556 0.0245     0.0229   0.0505     0.1180
## grade_group2_S1              3.1912 0.8173     1.5894   3.1912     4.7931
## grade_group3_S1              2.9709 0.9976     1.0157   2.9709     4.9261
## grade_group4_S1              2.0540 1.4025    -0.6948   2.0540     4.8029
## grade_group5_S1              3.4948 1.3242     0.8994   3.4948     6.0902
## mri1_S1                      1.5195 0.8084    -0.0649   1.5195     3.1039
## class_age_at_bx6070_S1       0.2104 0.9087    -1.5706   0.2104     1.9914
## class_age_at_bx70_S1         0.6940 0.9793    -1.2253   0.6940     2.6134
## after_txrt_S1               -1.1795 1.5389    -4.1957  -1.1795     1.8367
## after_txunk_S1              -1.3235 1.1039    -3.4870  -1.3235     0.8400
## 
## Frailty term variance (S1)
##                   mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 11.6985 2.0526     7.9699  11.6206     15.993
## 
## Survival outcome (S2)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2  0.0521 0.0218     0.0198   0.0489     0.1043
## grade_group2_S2              3.2262 0.8534     1.5535   3.2262     4.8989
## grade_group3_S2              3.1058 1.0443     1.0590   3.1058     5.1526
## grade_group4_S2              1.9393 1.4725    -0.9467   1.9393     4.8253
## grade_group5_S2              3.5001 1.3827     0.7900   3.5001     6.2101
## mri1_S2                      1.9643 0.8487     0.3008   1.9643     3.6278
## class_age_at_bx6070_S2       0.1497 0.9523    -1.7168   0.1497     2.0163
## class_age_at_bx70_S2         0.5012 1.0269    -1.5114   0.5012     2.5139
## after_txrt_S2               -0.9977 1.5937    -4.1214  -0.9977     2.1260
## after_txunk_S2               1.0270 1.1496    -1.2262   1.0270     3.2802
## 
## Association longitudinal - survival
##             mean     sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.0365 0.1443    -0.3259  -0.0347     0.2422
## CV_L1_S2  0.1891 0.1719    -0.1486   0.1888     0.5282
## 
## Association survival - survival
##                    mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.043 0.0926      0.864   1.0419     1.2286
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -5532.715                             -5520.481 
## 
## Deviance Information Criterion:  -894.2573
## Widely applicable Bayesian information criterion:  -1032.216
## Computation time: 6.85 seconds

Modelling : Only positive (>=0) time to PSAs

surv_long_ppsa_data <- surv_long_data[surv_long_data$time_to_psa >= 0, ]
# First joint model for longitudinal marker PSA
LM1_v2 <- joint(formLong=log_psa_results ~ time_to_psa + (1 + time_to_psa | id), 
             timeVar="time_to_psa",
             id="id", 
             family="gaussian", 
             control=list(int.strategy="eb"), # To make calculation faster, use only the mean for the hyper-parameter distributions
             dataLong=surv_long_ppsa_data
             )
summary(LM1_v2)
## Longitudinal outcome (gaussian)
##                         mean     sd 0.025quant 0.5quant 0.975quant
## Intercept             1.1826 0.0920     1.0023   1.1826     1.3628
## time_to_psa          -0.0156 0.0094    -0.0340  -0.0156     0.0027
## Res. err. (variance)  0.3320 0.0137     0.3060   0.3317     0.3599
## 
## Random effects variance-covariance (L1)
##                          mean     sd 0.025quant 0.5quant 0.975quant
## Intercept              0.8729 0.1304     0.6507   0.8636     1.1581
## time_to_psa            0.0098 0.0013     0.0075   0.0097     0.0124
## Intercept:time_to_psa -0.0121 0.0099    -0.0326  -0.0118     0.0067
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -1692.643                             -1688.679 
## 
## Deviance Information Criterion:  2434.868
## Widely applicable Bayesian information criterion:  2655.689
## Computation time: 1.38 seconds
# 643:42 , 1030:41 , 321:37 , 553:37 ,  938:36 ,  444:36 , 405:33 -> these are some patients with large enough data
patientID = 25 # pick one
max_psa_patient <- surv_long_ppsa_data$id[which.max(surv_long_ppsa_data$PSA_results)]
patient_counts <- table(surv_long_ppsa_data$id)
max_count <- max(patient_counts)
patients_with_max_count <- names(patient_counts[patient_counts == max_count])
patientID = patients_with_max_count

ND <- surv_long_ppsa_data[surv_long_ppsa_data$id==patientID,]
P1 <- predict(LM1_v2, ND, id="id", horizon=max(surv_long_ppsa_data$time_to_psa))$PredL # Make Linear prediction
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 74 on PID: 31741
# To plot, should run from here
plot(P1$time_to_psa, P1$quant0.5, type="l", lwd=2, ylim=range(c(P1$quant0.025, P1$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P1$time_to_psa, P1$quant0.025, lty=2)
lines(P1$time_to_psa, P1$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19) # to here , pch=size of the dots

# use splines?
NSplines <- ns(surv_long_ppsa_data$time_to_psa, knots=c(25)) # natural cubic splines , knots, the starting rise time, look at time to psa summary
f1 <- function(x) predict(NSplines, x)[,1] # first basis
f2 <- function(x) predict(NSplines, x)[,2] # second basis
# check splines
curve(f1, xlim=range(surv_long_ppsa_data$time_to_psa), ylim=c(-1,1))
curve(f2, xlim=range(surv_long_ppsa_data$time_to_psa), add=T)

# Second joint model for the longitudinal data, this time accounting for all
LM2_v2 <- joint(formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
             timeVar="time_to_psa",
             id="id", 
             family="gaussian", 
             control=list(int.strategy="eb"), 
             dataLong=surv_long_ppsa_data
            )
summary(LM2_v2)
## Longitudinal outcome (gaussian)
##                               mean     sd 0.025quant 0.5quant 0.975quant
## Intercept                   0.9918 0.3354     0.3343   0.9918     1.6492
## f1time_to_psa              -1.7051 0.5043    -2.6936  -1.7051    -0.7167
## f2time_to_psa               0.5531 0.3341    -0.1017   0.5531     1.2078
## grade_group2               -0.6071 0.2219    -1.0421  -0.6071    -0.1722
## grade_group3               -0.6836 0.2681    -1.2090  -0.6836    -0.1582
## grade_group4               -0.9504 0.3801    -1.6954  -0.9504    -0.2055
## grade_group5               -0.8410 0.3571    -1.5409  -0.8410    -0.1410
## mri1                        0.0110 0.1833    -0.3482   0.0110     0.3702
## class_age_at_bx6070         0.1511 0.2106    -0.2617   0.1511     0.5639
## class_age_at_bx70           0.6134 0.2266     0.1694   0.6134     1.0574
## after_txrt                  1.3934 0.3777     0.6532   1.3934     2.1336
## after_txunk                 0.4360 0.2934    -0.1391   0.4360     1.0111
## f1time_to_psa:grade_group2  0.4454 0.7389    -1.0028   0.4454     1.8935
## f1time_to_psa:grade_group3  0.4420 0.8772    -1.2772   0.4420     2.1612
## f1time_to_psa:grade_group4  1.4517 1.2481    -0.9946   1.4517     3.8979
## f1time_to_psa:grade_group5  3.1269 1.4766     0.2329   3.1269     6.0209
## f2time_to_psa:grade_group2  2.3665 0.5431     1.3021   2.3665     3.4309
## f2time_to_psa:grade_group3  1.9749 0.5474     0.9019   1.9749     3.0478
## f2time_to_psa:grade_group4 -0.8102 0.6849    -2.1527  -0.8102     0.5322
## f2time_to_psa:grade_group5  0.7821 2.2827    -3.6918   0.7821     5.2561
## Res. err. (variance)        0.3161 0.0133     0.2909   0.3158     0.3432
## 
## Random effects variance-covariance (L1)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Intercept                    0.9433 0.4533     0.5340   0.8485     1.8545
## f1time_to_psa                8.0354 3.7853     3.3455   7.4476    16.9789
## f2time_to_psa                1.1070 0.9675     0.1861   0.8312     3.6266
## Intercept:f1time_to_psa     -1.0611 1.0419    -3.2209  -0.9695     0.5956
## Intercept:f2time_to_psa      0.5666 0.5237    -0.0651   0.4643     1.6237
## f1time_to_psa:f2time_to_psa -2.0608 1.3205    -5.1128  -1.8495    -0.0936
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -1554.261                             -1547.541 
## 
## Deviance Information Criterion:  2392.827
## Widely applicable Bayesian information criterion:  2566.361
## Computation time: 2.11 seconds
# Again, pick one patient (here the same)
ND2 <- surv_long_ppsa_data[surv_long_ppsa_data$id==patientID,] # observed vs. fitted for a couple individuals.
P2 <- predict(LM2_v2, ND, id="id", horizon=max(surv_long_ppsa_data$time_to_psa))$PredL
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 74 on PID: 31741
plot(P2$time_to_psa, P2$quant0.5, type="l", lwd=2, ylim=range(c(P2$quant0.025, P2$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P2$time_to_psa, P2$quant0.025, lty=2)
lines(P2$time_to_psa, P2$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19)

# Third joint model, combinning 2 survival modals and the longitudinal one 
LJM1_v2 <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx  + (1|id),
                          inla.surv(drfs_time, drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx),
            formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
            id="id",
            timeVar="time_to_psa",
            basRisk=c("rw2", "rw2"), # random walk
            assocSurv=TRUE,
            family="gaussian", 
            assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
            control=list(int.strategy="eb"), 
            dataSurv=list(surv_nolong_data[which(surv_nolong_data$id %in% surv_long_ppsa_data$id),]), 
            dataLong=surv_long_ppsa_data
            )
## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ grade_group
## + : The hyperparameters skewness correction seems abnormal, this can be a sign
## of an ill-defined model and/or issues with the fit.
## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ grade_group
## + : Internal correlation between hyperparameters is abnormally high, this is a
## sign of identifiability issues / ill-defined model.
summary(LJM1_v2)
## Longitudinal outcome (gaussian)
##                                  mean     sd 0.025quant 0.5quant 0.975quant
## Intercept_L1                   0.9918 0.3350     0.3353   0.9918     1.6483
## f1time_to_psa_L1              -1.7289 0.5075    -2.7235  -1.7289    -0.7343
## f2time_to_psa_L1               0.5008 0.3293    -0.1446   0.5008     1.1461
## grade_group2_L1               -0.6026 0.2214    -1.0365  -0.6026    -0.1687
## grade_group3_L1               -0.6836 0.2674    -1.2077  -0.6836    -0.1594
## grade_group4_L1               -0.9430 0.3792    -1.6862  -0.9430    -0.1999
## grade_group5_L1               -0.8385 0.3564    -1.5370  -0.8385    -0.1401
## mri1_L1                        0.0130 0.1830    -0.3456   0.0130     0.3717
## class_age_at_bx6070_L1         0.1520 0.2104    -0.2604   0.1520     0.5644
## class_age_at_bx70_L1           0.6159 0.2263     0.1724   0.6159     1.0595
## after_txrt_L1                  1.3923 0.3774     0.6527   1.3923     2.1319
## after_txunk_L1                 0.4313 0.2930    -0.1431   0.4313     1.0056
## f1time_to_psa:grade_group2_L1  0.4429 0.7436    -1.0144   0.4429     1.9003
## f1time_to_psa:grade_group3_L1  0.4309 0.8828    -1.2994   0.4309     2.1612
## f1time_to_psa:grade_group4_L1  1.4457 1.2564    -1.0169   1.4457     3.9083
## f1time_to_psa:grade_group5_L1  3.1416 1.4851     0.2307   3.1416     6.0524
## f2time_to_psa:grade_group2_L1  2.4056 0.5360     1.3551   2.4056     3.4561
## f2time_to_psa:grade_group3_L1  1.9442 0.5374     0.8908   1.9442     2.9975
## f2time_to_psa:grade_group4_L1 -0.7062 0.6706    -2.0206  -0.7062     0.6081
## f2time_to_psa:grade_group5_L1  0.8545 2.2890    -3.6318   0.8545     5.3407
## Res. err. (variance)           0.3161 0.0134     0.2909   0.3157     0.3437
## 
## Random effects variance-covariance (L1)
##                                           mean           sd 0.025quant 0.5quant
## Intercept_L1                         21877.233     368288.5     0.6297   9.0712
## f1time_to_psa_L1                  43925604.510 1520625231.5     0.0427   8.3465
## f2time_to_psa_L1                     11684.163     435821.5     0.0001   0.5875
## Intercept_L1:f1time_to_psa_L1       468908.831   14173708.5 -3121.9990  -1.2575
## Intercept_L1:f2time_to_psa_L1         5490.018     115292.2   -46.2756   0.3083
## f1time_to_psa_L1:f2time_to_psa_L1   104633.027    2110109.5   -42.7303  -0.0440
##                                    0.975quant
## Intercept_L1                        44647.276
## f1time_to_psa_L1                  3049344.962
## f2time_to_psa_L1                     5644.743
## Intercept_L1:f1time_to_psa_L1      185387.372
## Intercept_L1:f2time_to_psa_L1        9853.974
## f1time_to_psa_L1:f2time_to_psa_L1   81180.934
## 
## Survival outcome (S1)
##                                mean       sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 14.8147 178.1015     0.0000   0.0087    55.7707
## grade_group2_S1              2.9313   0.7781     1.4063   2.9313     4.4564
## grade_group3_S1              2.7160   0.9494     0.8552   2.7160     4.5768
## grade_group4_S1              1.8039   1.3340    -0.8107   1.8039     4.4186
## grade_group5_S1              3.2865   1.2613     0.8144   3.2865     5.7587
## mri1_S1                      1.4565   0.7689    -0.0506   1.4565     2.9635
## class_age_at_bx6070_S1       0.2248   0.8650    -1.4705   0.2248     1.9200
## class_age_at_bx70_S1         0.7894   0.9320    -1.0373   0.7894     2.6160
## after_txrt_S1               -0.7927   1.4678    -3.6695  -0.7927     2.0841
## after_txunk_S1              -1.1563   1.0562    -3.2263  -1.1563     0.9138
## 
## Frailty term variance (S1)
##                  mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 11.817 6.9784     3.1772  10.2328    29.9483
## 
## Survival outcome (S2)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2  0.3268 0.8045     0.0049   0.0955     2.2095
## grade_group2_S2              3.1092 0.8563     1.4309   3.1092     4.7875
## grade_group3_S2              3.0022 1.0478     0.9487   3.0022     5.0558
## grade_group4_S2              1.8863 1.4775    -1.0095   1.8863     4.7821
## grade_group5_S2              3.5006 1.3886     0.7790   3.5006     6.2222
## mri1_S2                      1.9778 0.8510     0.3098   1.9778     3.6458
## class_age_at_bx6070_S2       0.2044 0.9557    -1.6688   0.2044     2.0777
## class_age_at_bx70_S2         0.6324 1.0307    -1.3877   0.6324     2.6526
## after_txrt_S2               -0.5736 1.6023    -3.7140  -0.5736     2.5667
## after_txunk_S2               1.2060 1.1589    -1.0654   1.2060     3.4774
## 
## Association longitudinal - survival
##             mean     sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.1200 1.8057    -3.5926  -0.1475     3.5162
## CV_L1_S2  0.0141 1.4056    -2.6902  -0.0070     2.8434
## 
## Association survival - survival
##                     mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.1452 0.4414     0.3014   1.1368      2.039
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -4971.973                             -4959.739 
## 
## Deviance Information Criterion:  -1988.515
## Widely applicable Bayesian information criterion:  -2024.257
## Computation time: 6.67 seconds

ALL T2PSA By GG

gg <- 2
id_rle <- rle(surv_long_data$id)
new_id <- rep(seq_along(id_rle$lengths), id_rle$lengths)

# -> Reassign the new IDs to the id column in surv_long_data
longdata <- surv_long_data
longdata$id <- new_id

# Surv data to be used in the latter join model 
surdata <- longdata[!duplicated(longdata$id), ]

  
# Third joint model, combinning 2 survival modals and the longitudinal one 
LJM1_tmp <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ mri+class_age_at_bx+after_tx  + (1|id),
                          inla.surv(drfs_time, drfs_status) ~ mri+class_age_at_bx+after_tx),
            formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
            id="id",
            timeVar="time_to_psa",
            basRisk=c("rw2", "rw2"), # random walk
            assocSurv=TRUE,
            family="gaussian", 
            assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
            control=list(int.strategy="eb"), 
            dataSurv=list(surdata[(surdata$id %in% longdata$id) & (surdata$grade_group == gg),]), 
            dataLong=longdata[longdata$grade_group == gg,]
            )
## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ mri + : Max
## id is 117 but there are only 39 individuals with longitudinal records, I'm
## reassigning id from 1 to 39
cat("Current GG is ", gg)
## Current GG is  2
summary(LJM1_tmp)
## Longitudinal outcome (gaussian)
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Intercept_L1             0.7770 0.2469     0.2931   0.7770     1.2609
## f1time_to_psa_L1        -0.7843 0.2614    -1.2966  -0.7843    -0.2720
## f2time_to_psa_L1         2.1851 0.4053     1.3906   2.1851     2.9795
## mri1_L1                  0.2193 0.1880    -0.1492   0.2193     0.5878
## class_age_at_bx6070_L1  -0.1058 0.2059    -0.5093  -0.1058     0.2978
## class_age_at_bx70_L1     0.2007 0.2377    -0.2651   0.2007     0.6666
## after_txrt_L1            0.9697 0.4221     0.1423   0.9697     1.7971
## after_txunk_L1           0.2544 0.2542    -0.2437   0.2544     0.7526
## f1time_to_psa:mri1_L1   -4.6636 4.0228   -12.5480  -4.6636     3.2209
## f2time_to_psa:mri1_L1  -10.8246 7.7175   -25.9506 -10.8246     4.3014
## Res. err. (variance)     0.4590 0.0314     0.4005   0.4579     0.5239
## 
## Random effects variance-covariance (L1)
##                                      mean     sd 0.025quant 0.5quant 0.975quant
## Intercept_L1                       0.2262 0.1253     0.1040   0.1986     0.5259
## f1time_to_psa_L1                   0.9705 1.4628     0.1714   0.6297     4.1348
## f2time_to_psa_L1                   0.1859 0.1518     0.0338   0.1424     0.5886
## Intercept_L1:f1time_to_psa_L1     -0.0120 0.3152    -0.5829   0.0194     0.3672
## Intercept_L1:f2time_to_psa_L1      0.0155 0.1170    -0.2132   0.0081     0.2693
## f1time_to_psa_L1:f2time_to_psa_L1  0.1382 0.3983    -0.2008   0.0224     1.1353
## 
## Survival outcome (S1)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1  0.0284 0.0554     0.0007   0.0110     0.1709
## mri1_S1                      0.4376 0.8389    -1.2066   0.4376     2.0817
## class_age_at_bx6070_S1      -0.3112 0.8938    -2.0630  -0.3112     1.4407
## class_age_at_bx70_S1        -0.7998 1.0489    -2.8557  -0.7998     1.2561
## after_txrt_S1               -1.9794 1.8251    -5.5565  -1.9794     1.5977
## after_txunk_S1              -0.2459 1.0790    -2.3606  -0.2459     1.8688
## 
## Frailty term variance (S1)
##                  mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 4.3621 1.9741      1.661   3.9819      9.316
## 
## Survival outcome (S2)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2  0.0504 0.0811     0.0023   0.0242     0.2683
## mri1_S2                      0.4830 0.9986    -1.4742   0.4830     2.4403
## class_age_at_bx6070_S2      -0.4422 1.0539    -2.5078  -0.4422     1.6233
## class_age_at_bx70_S2        -1.0144 1.2368    -3.4384  -1.0144     1.4097
## after_txrt_S2               -2.4696 2.1249    -6.6343  -2.4696     1.6950
## after_txunk_S2               2.0236 1.2601    -0.4462   2.0236     4.4934
## 
## Association longitudinal - survival
##             mean     sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.6431 0.8051    -2.2047  -0.6511     0.9655
## CV_L1_S2 -0.1483 0.8807    -1.8652  -0.1541     1.6025
## 
## Association survival - survival
##                     mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.1861 0.1991     0.8014   1.1836     1.5852
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -1700.249                             -1688.015 
## 
## Deviance Information Criterion:  -30.77604
## Widely applicable Bayesian information criterion:  -88.97328
## Computation time: 2.96 seconds

ONLY +T2PSA By GG for (gg in c(‘1’,‘2’,‘3’,‘4’,‘5’)) {

Third joint model, combinning 2 survival modals and the longitudinal one LJM2_tmp <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ mri+class_age_at_bx+after_tx + (1|id), inla.surv(drfs_time, drfs_status) ~ mri+class_age_at_bx+after_tx), formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id), id=“id”, timeVar=“time_to_psa”, basRisk=c(“rw2”, “rw2”), # random walk assocSurv=TRUE, family=“gaussian”, assoc=c(“CV”, “CV”), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t control=list(int.strategy=“eb”), dataSurv=list(surv_nolong_data[(surv_nolong_data\(id %in% surv_long_ppsa_data\)id) & (surv_nolong_data\(grade_group == gg),]), dataLong=surv_long_ppsa_data[surv_long_ppsa_data\)grade_group == gg,] ) cat(“Current GG is”, gg)

summary(LJM2_tmp)

}