desc_stat

Packages
- Installation
- Loading
Import data
Descriptive stat
Modelling

Packages

Installation

# No packages to install, so far

Loading

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Import data

abx <- read_csv("datafolder/bx.csv", col_select = -...1)

## New names:
## Rows: 12277 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (5): id, grade_group, mri, time_to_mri, age_at_bx dttm (3): bx_date, mri_date,
## last_data_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

apsa <- read_csv("datafolder/psa.csv", col_select = -...1)

## New names:
## Rows: 194195 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (3): id, psa_results, time_to_psa dttm (1): psa_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

abcr <- read_csv("datafolder/bcr.csv", col_select = -...1)

## New names:
## Rows: 2684 Columns: 19
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): after_tx dbl (16): id, grade_group, mri, time_to_tx, tx_to_bcr, tx_to_end,
## time_to_b... dttm (2): tx_date, bcr_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

atfs <- read_csv("datafolder/tfs.csv", col_select = -...1)

## New names:
## Rows: 12277 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): type_of_treatment dbl (7): id, grade_group, mri, tfs_status, tfs_time,
## tfs5_status, tfs5_time dttm (1): treatment_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`

Descriptive stat

Biopsy

summary(abx)

##        id           bx_date                        grade_group   
##  Min.   :    1   Min.   :1993-09-09 00:00:00.00   Min.   :1.000  
##  1st Qu.: 3129   1st Qu.:2008-11-14 00:00:00.00   1st Qu.:1.000  
##  Median : 6265   Median :2012-11-27 00:00:00.00   Median :2.000  
##  Mean   : 6269   Mean   :2012-02-01 22:53:43.77   Mean   :2.495  
##  3rd Qu.: 9409   3rd Qu.:2016-03-31 00:00:00.00   3rd Qu.:3.000  
##  Max.   :12545   Max.   :2019-06-13 00:00:00.00   Max.   :5.000  
##                                                                  
##     mri_date                           mri          time_to_mri     
##  Min.   :1999-08-10 00:00:00.00   Min.   :0.0000   Min.   :-12.000  
##  1st Qu.:2016-08-03 00:00:00.00   1st Qu.:0.0000   1st Qu.: -1.000  
##  Median :2017-09-12 00:00:00.00   Median :0.0000   Median : -1.000  
##  Mean   :2017-05-01 01:55:21.68   Mean   :0.1454   Mean   : -1.168  
##  3rd Qu.:2018-08-09 00:00:00.00   3rd Qu.:0.0000   3rd Qu.: -1.000  
##  Max.   :2019-06-07 00:00:00.00   Max.   :1.0000   Max.   : 28.000  
##  NA's   :10492                                     NA's   :10492    
##    age_at_bx     last_data_date                  
##  Min.   :37.00   Min.   :1994-11-07 00:00:00.00  
##  1st Qu.:63.00   1st Qu.:2017-02-20 13:37:00.00  
##  Median :68.00   Median :2020-09-14 09:12:00.00  
##  Mean   :67.71   Mean   :2018-05-02 13:55:13.81  
##  3rd Qu.:73.00   3rd Qu.:2021-01-20 11:19:00.00  
##  Max.   :80.00   Max.   :2021-05-05 00:00:00.00  
##

# Bar plot for grade_group
barplot(table(abx$grade_group), main = "Distribution of grade_group", xlab = "Grade Group", ylab = "Count")

# Box plot for time_to_mri
boxplot(abx$time_to_mri, main = "Distribution of time_to_mri", ylab = "Time to MRI")

# Histogram for age_at_bx
hist(abx$age_at_bx, main = "Distribution of Age at Biopsy", xlab = "Age", ylab = "Frequency")

# Histogram for mri
hist(abx$mri, main = "Distribution of MRI", xlab = "MRI", ylab = "Frequency")

# Convert datetime variables to Date format
abx$bx_date <- as.Date(abx$bx_date)
abx$mri_date <- as.Date(abx$mri_date)

# Plot bx_date
ggplot(abx, aes(x = bx_date)) + 
  geom_histogram(binwidth = 30, color = "black", fill = "blue") + 
  labs(title = "Distribution of bx_date", x = "Date", y = "Frequency")

# Plot mri_date
ggplot(abx, aes(x = mri_date)) + 
  geom_histogram(binwidth = 30, color = "black", fill = "green") + 
  labs(title = "Distribution of mri_date", x = "Date", y = "Frequency")

## Warning: Removed 10492 rows containing non-finite values (`stat_bin()`).

PSA

summary(apsa)

##        id         psa_results          psa_date                     
##  Min.   :    1   Min.   :    0.00   Min.   :1998-05-26 07:01:00.00  
##  1st Qu.: 3031   1st Qu.:    0.08   1st Qu.:2011-01-03 11:56:00.00  
##  Median : 6098   Median :    1.10   Median :2015-01-21 10:17:00.00  
##  Mean   : 6096   Mean   :   14.77   Mean   :2014-04-03 21:36:56.91  
##  3rd Qu.: 9119   3rd Qu.:    5.80   3rd Qu.:2018-03-13 07:45:00.00  
##  Max.   :12227   Max.   :76800.00   Max.   :2021-04-08 08:09:00.00  
##   time_to_psa     
##  Min.   :-244.00  
##  1st Qu.:   4.00  
##  Median :  25.00  
##  Mean   :  30.67  
##  3rd Qu.:  60.00  
##  Max.   : 284.00

# Convert datetime variable to POSIXct format
apsa$psa_date <- as.POSIXct(apsa$psa_date)

# Plot frequency of psa_date
apsa$year <- format(apsa$psa_date, "%Y")
hist(as.numeric(apsa$year), main = "Frequency of PSA tests", xlab = "Date", ylab = "Frequency", breaks = 30)

# Apply log transformation to psa_results
apsa$log_psa_results <- log(apsa$psa_results + 1)
hist(as.numeric(apsa$log_psa_results)[apsa$time_to_psa==0], main = "Dist LOG PSA at baseline (ref)", xlab = "log(PSA+1)", ylab = "Frequency", breaks = 30)

# Calculate the average log_psa_result
average_log_psa <- mean(apsa$log_psa_results[apsa$time_to_psa == 0])

# Center the log_psa_result around its average
centered_log_psa <- apsa$log_psa_results - average_log_psa

# Plot the histogram
hist(centered_log_psa, main = "Centered Dist LOG PSA at baseline", xlab = "log(PSA+1) - avg", ylab = "Frequency", breaks = 30)

hist(as.numeric(apsa$psa_results)[apsa$time_to_psa==0 & apsa$psa_results<100], main = "Dist PSA at baseline", xlab = "PSA < 100 at baseline", ylab = "Frequency", breaks = 10)

# Plot histogram for Time to PSA
hist(apsa$time_to_psa, main = "Distribution of Time to PSA", xlab = "Time to PSA", ylab = "Frequency")

# Plot histogram for + Time to PSA
hist(apsa$time_to_psa[apsa$time_to_psa>=0], main = "Distribution of +Time to PSA", xlab = "+Time to PSA", ylab = "Frequency")

# Plot histogram for + Time to PSA
hist(log(apsa$time_to_psa+1)[apsa$time_to_psa>=0], main = "Distribution of LOG +Time to PSA", xlab = "+Time to PSA", ylab = "Frequency")

## Warning in log(apsa$time_to_psa + 1): NaNs produced

Spaghetti Plots

# Plot log(psa_results + 0.1) against time_to_psa as spaghetti plot
matplot(apsa$time_to_psa, apsa$log_psa_results, type = "l", main = "Spaghetti Plot of Log Transformed PSA Results Over Time", xlab = "Time to psa", ylab = "Log(PSA + 0.1)", col = 1:nrow(apsa), lty = 1)

# Plot log(psa_results + 0.1) against time_to_psa>0 as spaghetti plot
pos_apsa <- apsa[apsa$time_to_psa>0,]
matplot(pos_apsa$time_to_psa, pos_apsa$log_psa_results, type = "l", main = "Spaghetti Plot of Log Post-Bx Transformed PSA Results Over Time", xlab = "Time to psa", ylab = "Log(PSA + 0.1)", col = 1:nrow(pos_apsa), lty = 1)

# ggplot2, nicer plots
meta_apsa <- merge(pos_apsa,
                   abx[, c('id', 'grade_group', 'mri','age_at_bx')], on='id', all=TRUE)
p <- ggplot(data = meta_apsa, aes(x = time_to_psa, y = log_psa_results, group = id, colour=grade_group))

# all
p + 
  geom_line() + 
  stat_smooth(aes(group = 1), color='red') + 
  stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3)

## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).

## Warning: Removed 458 rows containing non-finite values (`stat_summary()`).

## Warning: Removed 458 rows containing missing values (`geom_line()`).

# By GG
p + 
  geom_line() + 
  stat_smooth(aes(group = 1), color='red') + 
  #stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3, color='yellow')+ 
  facet_grid(. ~ grade_group)

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).
## Removed 458 rows containing missing values (`geom_line()`).

p + 
  geom_line() + 
  stat_smooth(aes(group = 1), color='red') + 
  #stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3, color='yellow')+ 
  facet_grid(. ~ mri)

## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).
## Removed 458 rows containing missing values (`geom_line()`).

Biochemical recurrence (BCR)

summary(abcr)

##        id         grade_group         mri           after_tx        
##  Min.   :    2   Min.   :1.000   Min.   :0.0000   Length:2684       
##  1st Qu.: 2921   1st Qu.:2.000   1st Qu.:0.0000   Class :character  
##  Median : 6002   Median :3.000   Median :0.0000   Mode  :character  
##  Mean   : 5989   Mean   :2.865   Mean   :0.1073                     
##  3rd Qu.: 8993   3rd Qu.:4.000   3rd Qu.:0.0000                     
##  Max.   :12224   Max.   :5.000   Max.   :1.0000                     
##     tx_date                         time_to_tx       tx_to_bcr     
##  Min.   :1999-09-06 08:44:00.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.:2009-09-09 18:00:00.00   1st Qu.:  3.00   1st Qu.: 16.00  
##  Median :2012-11-30 00:00:00.00   Median :  4.00   Median : 34.00  
##  Mean   :2012-07-06 19:29:22.77   Mean   : 11.91   Mean   : 42.72  
##  3rd Qu.:2015-11-16 06:00:00.00   3rd Qu.:  7.00   3rd Qu.: 60.00  
##  Max.   :2020-04-16 00:00:00.00   Max.   :177.00   Max.   :194.00  
##    tx_to_end         bcr_date                       time_to_bcr    
##  Min.   :  2.00   Min.   :2000-07-24 08:00:00.00   Min.   :  2.00  
##  1st Qu.: 41.00   1st Qu.:2012-12-18 07:43:30.00   1st Qu.: 24.00  
##  Median : 69.00   Median :2016-01-17 00:00:00.00   Median : 44.00  
##  Mean   : 75.92   Mean   :2015-06-15 20:45:10.81   Mean   : 54.63  
##  3rd Qu.:107.00   3rd Qu.:2018-07-30 20:15:45.00   3rd Qu.: 74.00  
##  Max.   :238.00   Max.   :2021-04-07 11:10:00.00   Max.   :226.00  
##   time_to_end       rfs_status    rfs_time       drfs_status   drfs_time     
##  Min.   :  3.00   Min.   :1    Min.   :  0.00   Min.   :1    Min.   :  2.00  
##  1st Qu.: 50.00   1st Qu.:1    1st Qu.: 16.00   1st Qu.:1    1st Qu.: 24.00  
##  Median : 80.00   Median :1    Median : 34.00   Median :1    Median : 44.00  
##  Mean   : 87.83   Mean   :1    Mean   : 42.72   Mean   :1    Mean   : 54.63  
##  3rd Qu.:120.00   3rd Qu.:1    3rd Qu.: 60.00   3rd Qu.:1    3rd Qu.: 74.00  
##  Max.   :265.00   Max.   :1    Max.   :194.00   Max.   :1    Max.   :226.00  
##   rfs5_status       rfs5_time      drfs5_status      drfs5_time   
##  Min.   :0.0000   Min.   : 0.00   Min.   :0.0000   Min.   : 2.00  
##  1st Qu.:1.0000   1st Qu.:16.00   1st Qu.:0.0000   1st Qu.:24.00  
##  Median :1.0000   Median :34.00   Median :1.0000   Median :44.00  
##  Mean   :0.7511   Mean   :34.78   Mean   :0.6505   Mean   :41.06  
##  3rd Qu.:1.0000   3rd Qu.:60.00   3rd Qu.:1.0000   3rd Qu.:60.00  
##  Max.   :1.0000   Max.   :60.00   Max.   :1.0000   Max.   :60.00

barplot(table(abcr$grade_group), main = "Distribution of grade_group", xlab = "Grade Group", ylab = "Count")

table(abcr$grade_group)

## 
##   1   2   3   4   5 
## 606 555 638 365 520

Treatment

summary(atfs)

##        id         grade_group         mri        
##  Min.   :    1   Min.   :1.000   Min.   :0.0000  
##  1st Qu.: 3129   1st Qu.:1.000   1st Qu.:0.0000  
##  Median : 6265   Median :2.000   Median :0.0000  
##  Mean   : 6269   Mean   :2.495   Mean   :0.1454  
##  3rd Qu.: 9409   3rd Qu.:3.000   3rd Qu.:0.0000  
##  Max.   :12545   Max.   :5.000   Max.   :1.0000  
##                                                  
##  treatment_date                   type_of_treatment    tfs_status    
##  Min.   :1999-06-07 11:43:00.00   Length:12277       Min.   :0.0000  
##  1st Qu.:2010-10-26 00:00:00.00   Class :character   1st Qu.:1.0000  
##  Median :2014-02-20 00:00:00.00   Mode  :character   Median :1.0000  
##  Mean   :2013-08-09 03:45:34.11                      Mean   :0.7764  
##  3rd Qu.:2016-12-21 00:00:00.00                      3rd Qu.:1.0000  
##  Max.   :2021-04-29 00:00:00.00                      Max.   :1.0000  
##  NA's   :2745                                                        
##     tfs_time       tfs5_status       tfs5_time   
##  Min.   :  0.00   Min.   :0.0000   Min.   : 0.0  
##  1st Qu.:  2.00   1st Qu.:0.0000   1st Qu.: 2.0  
##  Median :  4.00   Median :1.0000   Median : 4.0  
##  Mean   : 24.66   Mean   :0.7327   Mean   :16.4  
##  3rd Qu.: 25.00   3rd Qu.:1.0000   3rd Qu.:25.0  
##  Max.   :259.00   Max.   :1.0000   Max.   :60.0  
##

table(atfs$grade_group)

## 
##    1    2    3    4    5 
## 3652 3313 2503 1195 1614

table(atfs$mri)

## 
##     0     1 
## 10492  1785

Modelling

Import INLA and INLAjoint

library(INLA)

## Loading required package: Matrix

## Loading required package: sp

## This is INLA_24.02.09 built 2024-02-09 03:43:24 UTC.
##  - See www.r-inla.org/contact-us for how to get help.
##  - List available models/likelihoods/etc with inla.list.models()
##  - Use inla.doc(<NAME>) to access documentation

#inla.update(testing=TRUE)
library(INLAjoint)

## Package 'INLAjoint' version 24.2.5
## Type 'citation("INLAjoint")' for citing this R package in publications.

Data prepration

# First I have to combine (back) the tfs and bcr data in one dataframe
surv_data <- merge(abcr[, c('id', 'drfs_status', 'drfs_time', 'after_tx')],
                   abx[, c('id', 'grade_group', 'mri','age_at_bx')], on='id', all=TRUE)

surv_data <- merge(surv_data,
                   atfs[, c('id', 'type_of_treatment', 'tfs_status', 'tfs_time')], on='id', all=TRUE)
# New ID
#surv_data$id <- 1:nrow(surv_data)

# Re-Order
#surv_data <- surv_data[, c(6, 5, 4, 3, 2)]

# Remove where both time to treatment/bcr = 0 and event = 0 (this should be update when converting time to days)
all_null_tfs <- surv_data[surv_data$tfs_time==0 & surv_data$tfs_status==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_tfs$id), ]
all_null_drfs <- surv_data[surv_data$drfs_time==0 & surv_data$drfs_status==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_drfs$id), ]

# Now if no BCR data then rfs status = 0 and rfs time = tfs time [to fill out the NULL cells]
surv_data$drfs_status <- ifelse(is.na(surv_data$drfs_status), 0, surv_data$drfs_status)
surv_data$drfs_time <- ifelse(is.na(surv_data$drfs_time), surv_data$tfs_time, surv_data$drfs_time)

# Remove patients when time to tx/bcr = 0
all_null_tfs_drfs <- surv_data[surv_data$tfs_time==0 & surv_data$drfs_time==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_tfs_drfs$id), ]

# if the tfs_status = 1 and after tx ????

M-1: Treatment risk

M1 <- joint(formSurv =list(inla.surv(time = tfs_time, event = tfs_status) ~ -1),
            basRisk = c("rw2"),  
            dataSurv = list(surv_data))
summary(M1)

## 
## Survival outcome
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 1.4005 0.4296     0.7507    1.335     2.4293
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -25069.39                             -25069.39 
## 
## Deviance Information Criterion:  50050.01
## Widely applicable Bayesian information criterion:  50050.38
## Computation time: 1.32 seconds

plot(M1)

## $Baseline

## 
## attr(,"class")
## [1] "plot.INLAjoint" "list"

M-2: BCR risk

M2 <- joint(formSurv = list(inla.surv(time = drfs_time, event = drfs_status) ~ -1),
            basRisk = c("rw2"),
            dataSurv = list(surv_data))
summary(M2)

## 
## Survival outcome
##                           mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.375 0.1596     0.1562    0.344      0.776
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -11180.02                             -11180.02 
## 
## Deviance Information Criterion:  22294.39
## Widely applicable Bayesian information criterion:  22294.2
## Computation time: 1.32 seconds

plot(M2)

## $Baseline

## 
## attr(,"class")
## [1] "plot.INLAjoint" "list"

plot(M2)$Baseline+scale_y_log10()

JM-1: Treatment-BCR joint risk model

JM1 <- joint(formSurv=list(inla.surv(time = tfs_time, event = tfs_status) ~ -1+(1|id),
                           inla.surv(time = drfs_time, event = drfs_status) ~ -1), id="id",
             basRisk=c("rw2", "rw2"), assocSurv=TRUE, NbasRisk = 30,
             
             dataSurv = list(surv_data))

## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Max id is 12541 but there are only 11577 individuals with longitudinal
## records, I'm reassigning ids

## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : The hyperparameters skewness correction seems abnormal, this can be a sign
## of an ill-defined model and/or issues with the fit.

## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Stupid local search strategy used: This can be a sign of a ill-defined
## model and/or non-informative data.

summary(JM1)

## 
## Survival outcome (S1)
##                              mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 2.004 0.5085     1.1858   1.9437     3.1764
## 
## Frailty term variance (S1)
##                  mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 0.9765 0.1155     0.7769   0.9666     1.2302
## 
## Survival outcome (S2)
##                               mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 1.3623 0.3328     0.8618   1.3083      2.162
## 
## Association survival - survival
##                     mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 3.7982 0.2264     3.3352    3.804     4.2261
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -36738.35                             -36738.35 
## 
## Deviance Information Criterion:  74905.69
## Widely applicable Bayesian information criterion:  31895535
## Computation time: 23.84 seconds

plot(JM1)$Baseline+scale_y_log10()

# Grade group as a factor
surv_data$grade_group <- as.factor(surv_data$grade_group)

# MRI as a factor
surv_data$mri <- as.factor(surv_data$mri)

# drfs status and tfs_statusas a factor
surv_data$drfs_status <- as.factor(surv_data$drfs_status)
surv_data$tfs_status <- as.factor(surv_data$tfs_status)

# Classify 'Age' to: a<=65 , 65<a<=70 , 70>a
# Create a new column class_age_at_bx as a factor with 3 levels
surv_data$class_age_at_bx <- cut(surv_data$age_at_bx, 
                                 breaks = c(-Inf, 60, 70, Inf), 
                                 labels = c("<60", "60-70", ">70"),
                                 include.lowest = TRUE)

# Print a summary of the new attribute
table(surv_data$class_age_at_bx)

## 
##   <60 60-70   >70 
##  2056  5096  4425

# Summary of the data
summary(surv_data)

##        id        drfs_status   drfs_time        after_tx         grade_group
##  Min.   :    1   0:8893      Min.   :  1.00   Length:11577       1:3513     
##  1st Qu.: 3054   1:2684      1st Qu.:  3.00   Class :character   2:3182     
##  Median : 6124               Median : 14.00   Mode  :character   3:2397     
##  Mean   : 6121               Mean   : 36.13                      4:1116     
##  3rd Qu.: 9178               3rd Qu.: 52.00                      5:1369     
##  Max.   :12541               Max.   :259.00                                 
##  mri        age_at_bx     type_of_treatment  tfs_status    tfs_time     
##  0:9858   Min.   :39.00   Length:11577       0:2342     Min.   :  0.00  
##  1:1719   1st Qu.:63.00   Class :character   1:9235     1st Qu.:  2.00  
##           Median :68.00   Mode  :character              Median :  4.00  
##           Mean   :67.61                                 Mean   : 26.15  
##           3rd Qu.:73.00                                 3rd Qu.: 28.00  
##           Max.   :80.00                                 Max.   :259.00  
##  class_age_at_bx
##  <60  :2056     
##  60-70:5096     
##  >70  :4425     
##                 
##                 
##

JM-2: Account for: all

JM2 <- joint(formSurv=list(inla.surv(time = tfs_time, event = tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx+(1|id),
                           inla.surv(time = drfs_time, event = drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx), id="id",
             basRisk=c("rw2", "rw2"), assocSurv=TRUE, NbasRisk = 30,
             
             dataSurv = list(surv_data))

## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Max id is 12541 but there are only 11577 individuals with longitudinal
## records, I'm reassigning ids

summary(JM2)

## 
## Survival outcome (S1)
##                               mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 0.3945 0.0364     0.3440   0.3876     0.4825
## grade_group2_S1             1.3541 0.0558     1.2446   1.3541     1.4636
## grade_group3_S1             1.8129 0.0610     1.6934   1.8129     1.9325
## grade_group4_S1             1.7034 0.0785     1.5494   1.7034     1.8574
## grade_group5_S1             2.3105 0.0741     2.1653   2.3104     2.4558
## mri1_S1                     0.9038 0.0595     0.7871   0.9038     1.0206
## class_age_at_bx6070_S1      0.3481 0.0590     0.2324   0.3481     0.4637
## class_age_at_bx70_S1        0.5702 0.0609     0.4507   0.5702     0.6897
## after_txrt_S1               0.4825 0.0584     0.3680   0.4825     0.5971
## 
## Frailty term variance (S1)
##                  mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 4.4795 0.0958     4.3034   4.4749     4.6791
## 
## Survival outcome (S2)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2  0.3576 0.0270     0.3143   0.3541     0.4192
## grade_group2_S2              1.5567 0.0766     1.4067   1.5567     1.7070
## grade_group3_S2              1.9750 0.0837     1.8111   1.9750     2.1392
## grade_group4_S2              1.9112 0.1076     1.7003   1.9112     2.1224
## grade_group5_S2              2.6799 0.1019     2.4803   2.6798     2.8799
## mri1_S2                      1.4163 0.0819     1.2557   1.4162     1.5771
## class_age_at_bx6070_S2       0.4339 0.0807     0.2757   0.4339     0.5923
## class_age_at_bx70_S2         0.8903 0.0834     0.7267   0.8902     1.0540
## after_txrt_S2               -2.4713 0.0815    -2.6314  -2.4712    -2.3116
## 
## Association survival - survival
##                     mean   sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.3782 0.02     1.3359   1.3791     1.4144
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                              -81382.8                              -81382.8 
## 
## Deviance Information Criterion:  141085
## Widely applicable Bayesian information criterion:  142542.4
## Computation time: 20.3 seconds

plot(JM2)$Baseline+scale_y_log10()

Note:

This plots are not so affirmative the way it is presented, we can transform them to the survival to make more sense.
The longer time to treatment the longer time to relapse, which is expected as this data include several effecting covariates (Confounder), such as age and Gleason grade group.

V2: To survival Curves

M1_surv <- joint(formSurv=list(inla.surv(time =tfs_time, event = tfs_status) ~ -1),
             basRisk=c("rw2"),  dataSurv = surv_data)
summary(M1_surv)

## 
## Survival outcome
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 1.1258 0.3376     0.6126   1.0751     1.9322
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -48079.76                             -48079.76 
## 
## Deviance Information Criterion:  96063.18
## Widely applicable Bayesian information criterion:  96069.51
## Computation time: 1.25 seconds

#plot(M1_surv)
onePatient <- surv_data[1, ]
P <- predict(M1_surv, onePatient, id="id", horizon=300, surv=TRUE)$PredS

## Warning in predict.INLAjoint(M1_surv, onePatient, id = "id", horizon = 300, :
## The fitted model has baseline risk information up until value 259 for survival
## outcome 1. Since you ask for prediction at horizon 300 I will assume constant
## baseline hazard beyond the maximum available value. Alternatively, you can use
## baselineHaz='smooth' to use splines to predict the baseline hazard (for each
## sample). Alternatively, adding 'horizon' in the control options of the inla()
## call allows to extend the baseline beyond the last observed event time (linear
## extension, less flexible than the smooth method).

## Start sampling

## Sampling done.

## Computing longitudinal predictions for individual 1 on PID: 31741

## Computing survival predictions for individual 1

# plot survival curve for the two time to event outcomes
plot(P$time, P$Surv_quant0.5, type="l", lwd=2, ylim=c(0, 1), xlab="time", ylab="survival probability")
lines(P$time, P$Surv_quant0.025, lty=2)
lines(P$time, P$Surv_quant0.975, lty=2)
# add observed event times
# sapply(surv_data[surv_data$tfs_status==1, "tfs_time"], function(x) abline(v=x, lty=3, lwd=0.5))

M2_surv <- joint(formSurv=list(inla.surv(time = drfs_time, event = drfs_status) ~ -1),
            basRisk=c("rw2"),  dataSurv = surv_data)
summary(M2_surv)

## 
## Survival outcome
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.4468 0.1685     0.2063   0.4168     0.8625
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -41586.44                             -41586.44 
## 
## Deviance Information Criterion:  83092.81
## Widely applicable Bayesian information criterion:  83097.62
## Computation time: 1.3 seconds

#plot(M2_surv)
P2 <- predict(M2_surv, onePatient, id="id", horizon=300, surv=TRUE)$PredS

## Warning in predict.INLAjoint(M2_surv, onePatient, id = "id", horizon = 300, :
## The fitted model has baseline risk information up until value 259 for survival
## outcome 1. Since you ask for prediction at horizon 300 I will assume constant
## baseline hazard beyond the maximum available value. Alternatively, you can use
## baselineHaz='smooth' to use splines to predict the baseline hazard (for each
## sample). Alternatively, adding 'horizon' in the control options of the inla()
## call allows to extend the baseline beyond the last observed event time (linear
## extension, less flexible than the smooth method).

## Start sampling

## Sampling done.

## Computing longitudinal predictions for individual 1 on PID: 31741

## Computing survival predictions for individual 1

lines(P2$time, P2$Surv_quant0.5, lwd=2, col=2)
lines(P2$time, P2$Surv_quant0.025, lty=2, col=2)
lines(P2$time, P2$Surv_quant0.975, lty=2, col=2)
# sapply(surv_data[surv_data$drfs_time==1, "drfs_time"], function(x) abline(v=x, lty=3, lwd=0.5, col=2))
legend("topright", c("Treatment", "Relapse"), lwd=2, col=1:2)

V3: Including longitudinal data

Prepare data: Merging

# Add Longitudinal data (PSA) to the surv_Data
surv_long_data <- merge(surv_data, apsa[, c('id', 'time_to_psa', 'psa_results','log_psa_results')], by='id')

# Remove NaNs from treatment related cols
# Replace NA values with 'unk'
surv_long_data$after_tx <- ifelse(is.na(surv_long_data$after_tx), 'unk', surv_long_data$after_tx)
surv_long_data$type_of_treatment <- ifelse(is.na(surv_long_data$type_of_treatment), 'unk', surv_long_data$type_of_treatment)

# Convert to factor
surv_long_data$after_tx <- factor(surv_long_data$after_tx)
surv_long_data$type_of_treatment <- factor(surv_long_data$type_of_treatment)

# Uncomment when needed, Sampling
unique_ids <- unique(surv_long_data$id)
sample_size <- round(0.01 * length(unique_ids))

# Sample X.% of the unique IDs
sampled_ids <- sample(unique_ids, sample_size, replace = FALSE)

# Subset the data based on sampled IDs
surv_long_data <- surv_long_data[surv_long_data$id %in% c(sampled_ids, 25), ]

# Reassigning ID
# -> Calculate run-length encoding of the original id column
id_rle <- rle(surv_long_data$id)

# -> Generate a sequence of IDs based on the lengths of runs in the original id column
new_id <- rep(seq_along(id_rle$lengths), id_rle$lengths)

# -> Reassign the new IDs to the id column in surv_long_data
surv_long_data$id <- new_id

# Surv data to be used in the latter join model 
surv_nolong_data <- surv_long_data[!duplicated(surv_long_data$id), ]

summary(surv_long_data)

##        id         drfs_status   drfs_time      after_tx   grade_group mri     
##  Min.   :  1.00   0:1321      Min.   :  1.00   rp : 150   1:604       0:1364  
##  1st Qu.: 29.00   1: 398      1st Qu.:  4.00   rt : 248   2:541       1: 355  
##  Median : 58.00               Median : 18.00   unk:1321   3:314               
##  Mean   : 57.39               Mean   : 40.42              4:128               
##  3rd Qu.: 86.00               3rd Qu.: 54.00              5:132               
##  Max.   :117.00               Max.   :231.00                                  
##    age_at_bx     type_of_treatment tfs_status    tfs_time      class_age_at_bx
##  Min.   :49.00   chemo:  0         0: 373     Min.   :  0.00   <60  :311      
##  1st Qu.:62.00   horm :335         1:1346     1st Qu.:  3.00   60-70:873      
##  Median :67.00   radio:556                    Median :  5.00   >70  :535      
##  Mean   :66.91   rp   :455                    Mean   : 33.24                  
##  3rd Qu.:72.00   unk  :373                    3rd Qu.: 40.00                  
##  Max.   :80.00                                Max.   :231.00                  
##   time_to_psa       psa_results       log_psa_results  
##  Min.   :-156.00   Min.   :   0.000   Min.   :0.00000  
##  1st Qu.:   4.00   1st Qu.:   0.090   1st Qu.:0.08618  
##  Median :  24.00   Median :   1.160   Median :0.77011  
##  Mean   :  31.07   Mean   :  13.614   Mean   :1.12239  
##  3rd Qu.:  57.00   3rd Qu.:   5.955   3rd Qu.:1.93946  
##  Max.   : 231.00   Max.   :3260.000   Max.   :8.08979

Modelling : all PSAs (-/+)

library(splines)

# We applied log transformation to PSA with shift=1

# Longitudinal Data should be sorted for each patient.
surv_long_data <- surv_long_data[with(surv_long_data, order(id, time_to_psa)), ]

# First joint model for longitudinal marker PSA
LM1 <- joint(formLong=log_psa_results ~ time_to_psa + (1 + time_to_psa | id), 
             timeVar="time_to_psa",
             id="id", 
             family="gaussian", 
             control=list(int.strategy="eb"), # To make calculation faster, use only the mean for the hyper-parameter distributions
             dataLong=surv_long_data
             )
summary(LM1)

## Longitudinal outcome (gaussian)
##                         mean     sd 0.025quant 0.5quant 0.975quant
## Intercept             1.3393 0.0734     1.1954   1.3393     1.4832
## time_to_psa          -0.0147 0.0089    -0.0322  -0.0147     0.0029
## Res. err. (variance)  0.4463 0.0162     0.4155   0.4460     0.4791
## 
## Random effects variance-covariance (L1)
##                          mean     sd 0.025quant 0.5quant 0.975quant
## Intercept              0.5571 0.0821     0.4157   0.5502     0.7336
## time_to_psa            0.0092 0.0012     0.0071   0.0091     0.0118
## Intercept:time_to_psa -0.0013 0.0071    -0.0158  -0.0007     0.0119
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -2252.920                             -2248.957 
## 
## Deviance Information Criterion:  3686.216
## Widely applicable Bayesian information criterion:  3743.282
## Computation time: 1.33 seconds

# 643:42 , 1030:41 , 321:37 , 553:37 ,  938:36 ,  444:36 , 405:33 -> these are some patients with large enough data
patientID = 25 # pick one
patient_counts <- table(surv_long_data$id)
max_count <- max(patient_counts)
patients_with_max_count <- names(patient_counts[patient_counts == max_count])
patientID = patients_with_max_count

ND <- surv_long_data[surv_long_data$id==patientID,]
P1 <- predict(LM1, ND, id="id", horizon=max(surv_long_data$time_to_psa))$PredL # Make Linear prediction

## Start sampling

## Sampling done.

## Computing longitudinal predictions for individual 74 on PID: 31741

# To plot, should run from here
plot(P1$time_to_psa, P1$quant0.5, type="l", lwd=2, ylim=range(c(P1$quant0.025, P1$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P1$time_to_psa, P1$quant0.025, lty=2)
lines(P1$time_to_psa, P1$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19) # to here , pch=size of the dots

# use splines?
NSplines <- ns(surv_long_data$time_to_psa, knots=c(25)) # natural cubic splines , knots, the starting rise time, look at time to psa summary
f1 <- function(x) predict(NSplines, x)[,1] # first basis
f2 <- function(x) predict(NSplines, x)[,2] # second basis
# check splines
curve(f1, xlim=range(surv_long_data$time_to_psa), ylim=c(-1,1))
curve(f2, xlim=range(surv_long_data$time_to_psa), add=T)

# Second joint model for the longitudinal data, this time accounting for all
LM2 <- joint(formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
             timeVar="time_to_psa",
             id="id", 
             family="gaussian", 
             control=list(int.strategy="eb"), 
             dataLong=surv_long_data
            )
summary(LM2)

## Longitudinal outcome (gaussian)
##                               mean     sd 0.025quant 0.5quant 0.975quant
## Intercept                   1.2115 0.4835     0.2639   1.2115     2.1592
## f1time_to_psa              -2.4374 0.7851    -3.9762  -2.4374    -0.8986
## f2time_to_psa              -4.6243 1.8254    -8.2021  -4.6243    -1.0465
## grade_group2                0.7210 0.5707    -0.3976   0.7210     1.8396
## grade_group3               -0.9281 0.6808    -2.2624  -0.9281     0.4063
## grade_group4                1.4141 0.9685    -0.4842   1.4141     3.3124
## grade_group5               -0.4363 0.8244    -2.0521  -0.4363     1.1796
## mri1                       -0.1095 0.1365    -0.3770  -0.1095     0.1580
## class_age_at_bx6070         0.2621 0.1779    -0.0866   0.2621     0.6107
## class_age_at_bx70           0.5424 0.1857     0.1784   0.5424     0.9064
## after_txrt                  0.8043 0.2904     0.2351   0.8043     1.3734
## after_txunk                 0.1688 0.2218    -0.2660   0.1688     0.6036
## f1time_to_psa:grade_group2 -3.7616 1.0947    -5.9071  -3.7616    -1.6161
## f1time_to_psa:grade_group3 -1.3502 1.2783    -3.8557  -1.3502     1.1553
## f1time_to_psa:grade_group4 -2.3859 1.8599    -6.0312  -2.3859     1.2594
## f1time_to_psa:grade_group5  1.2357 1.5673    -1.8362   1.2357     4.3076
## f2time_to_psa:grade_group2 -3.1997 2.7304    -8.5512  -3.1997     2.1518
## f2time_to_psa:grade_group3 -5.2452 3.2391   -11.5937  -5.2452     1.1033
## f2time_to_psa:grade_group4  2.2777 4.3371    -6.2229   2.2777    10.7783
## f2time_to_psa:grade_group5  1.7360 4.3266    -6.7440   1.7360    10.2159
## Res. err. (variance)        0.4028 0.0158     0.3722   0.4027     0.4341
## 
## Random effects variance-covariance (L1)
##                                 mean      sd 0.025quant 0.5quant 0.975quant
## Intercept                     3.9518  3.5905     1.1649   2.9877    12.1458
## f1time_to_psa                13.2831  3.8321     7.6596  12.7082    22.4594
## f2time_to_psa               134.9731 28.0316    87.8352 132.3635   200.5550
## Intercept:f1time_to_psa       0.4751  3.2939    -7.6999   1.2702     3.7994
## Intercept:f2time_to_psa      18.1205  7.5477     8.1619  16.7820    36.4451
## f1time_to_psa:f2time_to_psa  26.7543  9.9779     5.2259  27.0630    45.1522
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -2108.808                             -2102.088 
## 
## Deviance Information Criterion:  3487.443
## Widely applicable Bayesian information criterion:  3555.614
## Computation time: 2.16 seconds

# Again, pick one patient (here the same)
ND2 <- surv_long_data[surv_long_data$id==patientID,] # observed vs. fitted for a couple individuals.
P2 <- predict(LM2, ND, id="id", horizon=max(surv_long_data$time_to_psa))$PredL

## Start sampling

## Sampling done.

## Computing longitudinal predictions for individual 74 on PID: 31741

plot(P2$time_to_psa, P2$quant0.5, type="l", lwd=2, ylim=range(c(P2$quant0.025, P2$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P2$time_to_psa, P2$quant0.025, lty=2)
lines(P2$time_to_psa, P2$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19)

#
inla.setOption(num.threads='8:1')
# Third joint model, combinning 2 survival modals and the longitudinal one 
LJM1 <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx  + (1|id),
                          inla.surv(drfs_time, drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx),
            formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
            id="id",
            timeVar="time_to_psa",
            basRisk=c("rw2", "rw2"), # random walk
            assocSurv=TRUE,
            family="gaussian", 
            assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
            control=list(int.strategy="eb"), 
            dataSurv=list(surv_nolong_data), 
            dataLong=surv_long_data
            )
summary(LJM1)

## Longitudinal outcome (gaussian)
##                                  mean     sd 0.025quant 0.5quant 0.975quant
## Intercept_L1                   1.2444 0.4767     0.3102   1.2444     2.1787
## f1time_to_psa_L1              -2.4534 0.7825    -3.9871  -2.4534    -0.9197
## f2time_to_psa_L1              -4.5632 1.7988    -8.0887  -4.5632    -1.0376
## grade_group2_L1                0.6752 0.5605    -0.4233   0.6752     1.7738
## grade_group3_L1               -0.9331 0.6703    -2.2469  -0.9331     0.3808
## grade_group4_L1                1.3646 0.9564    -0.5098   1.3646     3.2390
## grade_group5_L1               -0.4683 0.8111    -2.0581  -0.4683     1.1214
## mri1_L1                       -0.1331 0.1349    -0.3975  -0.1331     0.1312
## class_age_at_bx6070_L1         0.2623 0.1773    -0.0852   0.2623     0.6098
## class_age_at_bx70_L1           0.5376 0.1848     0.1755   0.5376     0.8997
## after_txrt_L1                  0.8004 0.2872     0.2375   0.8004     1.3633
## after_txunk_L1                 0.1612 0.2194    -0.2687   0.1612     0.5912
## f1time_to_psa:grade_group2_L1 -3.6727 1.0906    -5.8103  -3.6727    -1.5352
## f1time_to_psa:grade_group3_L1 -1.3250 1.2754    -3.8248  -1.3250     1.1748
## f1time_to_psa:grade_group4_L1 -2.2551 1.8558    -5.8924  -2.2551     1.3822
## f1time_to_psa:grade_group5_L1  1.2983 1.5668    -1.7725   1.2983     4.3691
## f2time_to_psa:grade_group2_L1 -3.2060 2.6914    -8.4810  -3.2060     2.0691
## f2time_to_psa:grade_group3_L1 -5.2278 3.1932   -11.4865  -5.2278     1.0308
## f2time_to_psa:grade_group4_L1  2.3189 4.2829    -6.0754   2.3189    10.7131
## f2time_to_psa:grade_group5_L1  1.6850 4.2736    -6.6911   1.6850    10.0610
## Res. err. (variance)           0.4032 0.0157     0.3735   0.4028     0.4352
## 
## Random effects variance-covariance (L1)
##                                       mean      sd 0.025quant 0.5quant
## Intercept_L1                        3.4037  2.4991     1.1437   2.8060
## f1time_to_psa_L1                   13.0074  3.6435     7.5592  12.5538
## f2time_to_psa_L1                  127.2802 23.6713    85.3376 125.8892
## Intercept_L1:f1time_to_psa_L1       0.5998  2.4390    -4.7301   1.1661
## Intercept_L1:f2time_to_psa_L1      16.6927  6.0326     7.9048  15.8057
## f1time_to_psa_L1:f2time_to_psa_L1  25.8435  8.4248     8.7672  25.9323
##                                   0.975quant
## Intercept_L1                          8.9143
## f1time_to_psa_L1                     21.6029
## f2time_to_psa_L1                    177.4183
## Intercept_L1:f1time_to_psa_L1         3.0623
## Intercept_L1:f2time_to_psa_L1        31.2052
## f1time_to_psa_L1:f2time_to_psa_L1    42.6080
## 
## Survival outcome (S1)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1  0.0556 0.0245     0.0229   0.0505     0.1180
## grade_group2_S1              3.1912 0.8173     1.5894   3.1912     4.7931
## grade_group3_S1              2.9709 0.9976     1.0157   2.9709     4.9261
## grade_group4_S1              2.0540 1.4025    -0.6948   2.0540     4.8029
## grade_group5_S1              3.4948 1.3242     0.8994   3.4948     6.0902
## mri1_S1                      1.5195 0.8084    -0.0649   1.5195     3.1039
## class_age_at_bx6070_S1       0.2104 0.9087    -1.5706   0.2104     1.9914
## class_age_at_bx70_S1         0.6940 0.9793    -1.2253   0.6940     2.6134
## after_txrt_S1               -1.1795 1.5389    -4.1957  -1.1795     1.8367
## after_txunk_S1              -1.3235 1.1039    -3.4870  -1.3235     0.8400
## 
## Frailty term variance (S1)
##                   mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 11.6985 2.0526     7.9699  11.6206     15.993
## 
## Survival outcome (S2)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2  0.0521 0.0218     0.0198   0.0489     0.1043
## grade_group2_S2              3.2262 0.8534     1.5535   3.2262     4.8989
## grade_group3_S2              3.1058 1.0443     1.0590   3.1058     5.1526
## grade_group4_S2              1.9393 1.4725    -0.9467   1.9393     4.8253
## grade_group5_S2              3.5001 1.3827     0.7900   3.5001     6.2101
## mri1_S2                      1.9643 0.8487     0.3008   1.9643     3.6278
## class_age_at_bx6070_S2       0.1497 0.9523    -1.7168   0.1497     2.0163
## class_age_at_bx70_S2         0.5012 1.0269    -1.5114   0.5012     2.5139
## after_txrt_S2               -0.9977 1.5937    -4.1214  -0.9977     2.1260
## after_txunk_S2               1.0270 1.1496    -1.2262   1.0270     3.2802
## 
## Association longitudinal - survival
##             mean     sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.0365 0.1443    -0.3259  -0.0347     0.2422
## CV_L1_S2  0.1891 0.1719    -0.1486   0.1888     0.5282
## 
## Association survival - survival
##                    mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.043 0.0926      0.864   1.0419     1.2286
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -5532.715                             -5520.481 
## 
## Deviance Information Criterion:  -894.2573
## Widely applicable Bayesian information criterion:  -1032.216
## Computation time: 6.85 seconds

Modelling : Only positive (>=0) time to PSAs

surv_long_ppsa_data <- surv_long_data[surv_long_data$time_to_psa >= 0, ]
# First joint model for longitudinal marker PSA
LM1_v2 <- joint(formLong=log_psa_results ~ time_to_psa + (1 + time_to_psa | id), 
             timeVar="time_to_psa",
             id="id", 
             family="gaussian", 
             control=list(int.strategy="eb"), # To make calculation faster, use only the mean for the hyper-parameter distributions
             dataLong=surv_long_ppsa_data
             )
summary(LM1_v2)

## Longitudinal outcome (gaussian)
##                         mean     sd 0.025quant 0.5quant 0.975quant
## Intercept             1.1826 0.0920     1.0023   1.1826     1.3628
## time_to_psa          -0.0156 0.0094    -0.0340  -0.0156     0.0027
## Res. err. (variance)  0.3320 0.0137     0.3060   0.3317     0.3599
## 
## Random effects variance-covariance (L1)
##                          mean     sd 0.025quant 0.5quant 0.975quant
## Intercept              0.8729 0.1304     0.6507   0.8636     1.1581
## time_to_psa            0.0098 0.0013     0.0075   0.0097     0.0124
## Intercept:time_to_psa -0.0121 0.0099    -0.0326  -0.0118     0.0067
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -1692.643                             -1688.679 
## 
## Deviance Information Criterion:  2434.868
## Widely applicable Bayesian information criterion:  2655.689
## Computation time: 1.38 seconds

# 643:42 , 1030:41 , 321:37 , 553:37 ,  938:36 ,  444:36 , 405:33 -> these are some patients with large enough data
patientID = 25 # pick one
max_psa_patient <- surv_long_ppsa_data$id[which.max(surv_long_ppsa_data$PSA_results)]
patient_counts <- table(surv_long_ppsa_data$id)
max_count <- max(patient_counts)
patients_with_max_count <- names(patient_counts[patient_counts == max_count])
patientID = patients_with_max_count

ND <- surv_long_ppsa_data[surv_long_ppsa_data$id==patientID,]
P1 <- predict(LM1_v2, ND, id="id", horizon=max(surv_long_ppsa_data$time_to_psa))$PredL # Make Linear prediction

## Start sampling

## Sampling done.

## Computing longitudinal predictions for individual 74 on PID: 31741

# To plot, should run from here
plot(P1$time_to_psa, P1$quant0.5, type="l", lwd=2, ylim=range(c(P1$quant0.025, P1$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P1$time_to_psa, P1$quant0.025, lty=2)
lines(P1$time_to_psa, P1$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19) # to here , pch=size of the dots

# use splines?
NSplines <- ns(surv_long_ppsa_data$time_to_psa, knots=c(25)) # natural cubic splines , knots, the starting rise time, look at time to psa summary
f1 <- function(x) predict(NSplines, x)[,1] # first basis
f2 <- function(x) predict(NSplines, x)[,2] # second basis
# check splines
curve(f1, xlim=range(surv_long_ppsa_data$time_to_psa), ylim=c(-1,1))
curve(f2, xlim=range(surv_long_ppsa_data$time_to_psa), add=T)

# Second joint model for the longitudinal data, this time accounting for all
LM2_v2 <- joint(formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
             timeVar="time_to_psa",
             id="id", 
             family="gaussian", 
             control=list(int.strategy="eb"), 
             dataLong=surv_long_ppsa_data
            )
summary(LM2_v2)

## Longitudinal outcome (gaussian)
##                               mean     sd 0.025quant 0.5quant 0.975quant
## Intercept                   0.9918 0.3354     0.3343   0.9918     1.6492
## f1time_to_psa              -1.7051 0.5043    -2.6936  -1.7051    -0.7167
## f2time_to_psa               0.5531 0.3341    -0.1017   0.5531     1.2078
## grade_group2               -0.6071 0.2219    -1.0421  -0.6071    -0.1722
## grade_group3               -0.6836 0.2681    -1.2090  -0.6836    -0.1582
## grade_group4               -0.9504 0.3801    -1.6954  -0.9504    -0.2055
## grade_group5               -0.8410 0.3571    -1.5409  -0.8410    -0.1410
## mri1                        0.0110 0.1833    -0.3482   0.0110     0.3702
## class_age_at_bx6070         0.1511 0.2106    -0.2617   0.1511     0.5639
## class_age_at_bx70           0.6134 0.2266     0.1694   0.6134     1.0574
## after_txrt                  1.3934 0.3777     0.6532   1.3934     2.1336
## after_txunk                 0.4360 0.2934    -0.1391   0.4360     1.0111
## f1time_to_psa:grade_group2  0.4454 0.7389    -1.0028   0.4454     1.8935
## f1time_to_psa:grade_group3  0.4420 0.8772    -1.2772   0.4420     2.1612
## f1time_to_psa:grade_group4  1.4517 1.2481    -0.9946   1.4517     3.8979
## f1time_to_psa:grade_group5  3.1269 1.4766     0.2329   3.1269     6.0209
## f2time_to_psa:grade_group2  2.3665 0.5431     1.3021   2.3665     3.4309
## f2time_to_psa:grade_group3  1.9749 0.5474     0.9019   1.9749     3.0478
## f2time_to_psa:grade_group4 -0.8102 0.6849    -2.1527  -0.8102     0.5322
## f2time_to_psa:grade_group5  0.7821 2.2827    -3.6918   0.7821     5.2561
## Res. err. (variance)        0.3161 0.0133     0.2909   0.3158     0.3432
## 
## Random effects variance-covariance (L1)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Intercept                    0.9433 0.4533     0.5340   0.8485     1.8545
## f1time_to_psa                8.0354 3.7853     3.3455   7.4476    16.9789
## f2time_to_psa                1.1070 0.9675     0.1861   0.8312     3.6266
## Intercept:f1time_to_psa     -1.0611 1.0419    -3.2209  -0.9695     0.5956
## Intercept:f2time_to_psa      0.5666 0.5237    -0.0651   0.4643     1.6237
## f1time_to_psa:f2time_to_psa -2.0608 1.3205    -5.1128  -1.8495    -0.0936
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -1554.261                             -1547.541 
## 
## Deviance Information Criterion:  2392.827
## Widely applicable Bayesian information criterion:  2566.361
## Computation time: 2.11 seconds

# Again, pick one patient (here the same)
ND2 <- surv_long_ppsa_data[surv_long_ppsa_data$id==patientID,] # observed vs. fitted for a couple individuals.
P2 <- predict(LM2_v2, ND, id="id", horizon=max(surv_long_ppsa_data$time_to_psa))$PredL

## Start sampling

## Sampling done.

## Computing longitudinal predictions for individual 74 on PID: 31741

plot(P2$time_to_psa, P2$quant0.5, type="l", lwd=2, ylim=range(c(P2$quant0.025, P2$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P2$time_to_psa, P2$quant0.025, lty=2)
lines(P2$time_to_psa, P2$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19)

# Third joint model, combinning 2 survival modals and the longitudinal one 
LJM1_v2 <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx  + (1|id),
                          inla.surv(drfs_time, drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx),
            formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
            id="id",
            timeVar="time_to_psa",
            basRisk=c("rw2", "rw2"), # random walk
            assocSurv=TRUE,
            family="gaussian", 
            assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
            control=list(int.strategy="eb"), 
            dataSurv=list(surv_nolong_data[which(surv_nolong_data$id %in% surv_long_ppsa_data$id),]), 
            dataLong=surv_long_ppsa_data
            )

## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ grade_group
## + : The hyperparameters skewness correction seems abnormal, this can be a sign
## of an ill-defined model and/or issues with the fit.

## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ grade_group
## + : Internal correlation between hyperparameters is abnormally high, this is a
## sign of identifiability issues / ill-defined model.

summary(LJM1_v2)

## Longitudinal outcome (gaussian)
##                                  mean     sd 0.025quant 0.5quant 0.975quant
## Intercept_L1                   0.9918 0.3350     0.3353   0.9918     1.6483
## f1time_to_psa_L1              -1.7289 0.5075    -2.7235  -1.7289    -0.7343
## f2time_to_psa_L1               0.5008 0.3293    -0.1446   0.5008     1.1461
## grade_group2_L1               -0.6026 0.2214    -1.0365  -0.6026    -0.1687
## grade_group3_L1               -0.6836 0.2674    -1.2077  -0.6836    -0.1594
## grade_group4_L1               -0.9430 0.3792    -1.6862  -0.9430    -0.1999
## grade_group5_L1               -0.8385 0.3564    -1.5370  -0.8385    -0.1401
## mri1_L1                        0.0130 0.1830    -0.3456   0.0130     0.3717
## class_age_at_bx6070_L1         0.1520 0.2104    -0.2604   0.1520     0.5644
## class_age_at_bx70_L1           0.6159 0.2263     0.1724   0.6159     1.0595
## after_txrt_L1                  1.3923 0.3774     0.6527   1.3923     2.1319
## after_txunk_L1                 0.4313 0.2930    -0.1431   0.4313     1.0056
## f1time_to_psa:grade_group2_L1  0.4429 0.7436    -1.0144   0.4429     1.9003
## f1time_to_psa:grade_group3_L1  0.4309 0.8828    -1.2994   0.4309     2.1612
## f1time_to_psa:grade_group4_L1  1.4457 1.2564    -1.0169   1.4457     3.9083
## f1time_to_psa:grade_group5_L1  3.1416 1.4851     0.2307   3.1416     6.0524
## f2time_to_psa:grade_group2_L1  2.4056 0.5360     1.3551   2.4056     3.4561
## f2time_to_psa:grade_group3_L1  1.9442 0.5374     0.8908   1.9442     2.9975
## f2time_to_psa:grade_group4_L1 -0.7062 0.6706    -2.0206  -0.7062     0.6081
## f2time_to_psa:grade_group5_L1  0.8545 2.2890    -3.6318   0.8545     5.3407
## Res. err. (variance)           0.3161 0.0134     0.2909   0.3157     0.3437
## 
## Random effects variance-covariance (L1)
##                                           mean           sd 0.025quant 0.5quant
## Intercept_L1                         21877.233     368288.5     0.6297   9.0712
## f1time_to_psa_L1                  43925604.510 1520625231.5     0.0427   8.3465
## f2time_to_psa_L1                     11684.163     435821.5     0.0001   0.5875
## Intercept_L1:f1time_to_psa_L1       468908.831   14173708.5 -3121.9990  -1.2575
## Intercept_L1:f2time_to_psa_L1         5490.018     115292.2   -46.2756   0.3083
## f1time_to_psa_L1:f2time_to_psa_L1   104633.027    2110109.5   -42.7303  -0.0440
##                                    0.975quant
## Intercept_L1                        44647.276
## f1time_to_psa_L1                  3049344.962
## f2time_to_psa_L1                     5644.743
## Intercept_L1:f1time_to_psa_L1      185387.372
## Intercept_L1:f2time_to_psa_L1        9853.974
## f1time_to_psa_L1:f2time_to_psa_L1   81180.934
## 
## Survival outcome (S1)
##                                mean       sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 14.8147 178.1015     0.0000   0.0087    55.7707
## grade_group2_S1              2.9313   0.7781     1.4063   2.9313     4.4564
## grade_group3_S1              2.7160   0.9494     0.8552   2.7160     4.5768
## grade_group4_S1              1.8039   1.3340    -0.8107   1.8039     4.4186
## grade_group5_S1              3.2865   1.2613     0.8144   3.2865     5.7587
## mri1_S1                      1.4565   0.7689    -0.0506   1.4565     2.9635
## class_age_at_bx6070_S1       0.2248   0.8650    -1.4705   0.2248     1.9200
## class_age_at_bx70_S1         0.7894   0.9320    -1.0373   0.7894     2.6160
## after_txrt_S1               -0.7927   1.4678    -3.6695  -0.7927     2.0841
## after_txunk_S1              -1.1563   1.0562    -3.2263  -1.1563     0.9138
## 
## Frailty term variance (S1)
##                  mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 11.817 6.9784     3.1772  10.2328    29.9483
## 
## Survival outcome (S2)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2  0.3268 0.8045     0.0049   0.0955     2.2095
## grade_group2_S2              3.1092 0.8563     1.4309   3.1092     4.7875
## grade_group3_S2              3.0022 1.0478     0.9487   3.0022     5.0558
## grade_group4_S2              1.8863 1.4775    -1.0095   1.8863     4.7821
## grade_group5_S2              3.5006 1.3886     0.7790   3.5006     6.2222
## mri1_S2                      1.9778 0.8510     0.3098   1.9778     3.6458
## class_age_at_bx6070_S2       0.2044 0.9557    -1.6688   0.2044     2.0777
## class_age_at_bx70_S2         0.6324 1.0307    -1.3877   0.6324     2.6526
## after_txrt_S2               -0.5736 1.6023    -3.7140  -0.5736     2.5667
## after_txunk_S2               1.2060 1.1589    -1.0654   1.2060     3.4774
## 
## Association longitudinal - survival
##             mean     sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.1200 1.8057    -3.5926  -0.1475     3.5162
## CV_L1_S2  0.0141 1.4056    -2.6902  -0.0070     2.8434
## 
## Association survival - survival
##                     mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.1452 0.4414     0.3014   1.1368      2.039
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -4971.973                             -4959.739 
## 
## Deviance Information Criterion:  -1988.515
## Widely applicable Bayesian information criterion:  -2024.257
## Computation time: 6.67 seconds

ALL T2PSA By GG

gg <- 2
id_rle <- rle(surv_long_data$id)
new_id <- rep(seq_along(id_rle$lengths), id_rle$lengths)

# -> Reassign the new IDs to the id column in surv_long_data
longdata <- surv_long_data
longdata$id <- new_id

# Surv data to be used in the latter join model 
surdata <- longdata[!duplicated(longdata$id), ]

  
# Third joint model, combinning 2 survival modals and the longitudinal one 
LJM1_tmp <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ mri+class_age_at_bx+after_tx  + (1|id),
                          inla.surv(drfs_time, drfs_status) ~ mri+class_age_at_bx+after_tx),
            formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
            id="id",
            timeVar="time_to_psa",
            basRisk=c("rw2", "rw2"), # random walk
            assocSurv=TRUE,
            family="gaussian", 
            assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
            control=list(int.strategy="eb"), 
            dataSurv=list(surdata[(surdata$id %in% longdata$id) & (surdata$grade_group == gg),]), 
            dataLong=longdata[longdata$grade_group == gg,]
            )

## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ mri + : Max
## id is 117 but there are only 39 individuals with longitudinal records, I'm
## reassigning id from 1 to 39

cat("Current GG is ", gg)

## Current GG is  2

summary(LJM1_tmp)

## Longitudinal outcome (gaussian)
##                            mean     sd 0.025quant 0.5quant 0.975quant
## Intercept_L1             0.7770 0.2469     0.2931   0.7770     1.2609
## f1time_to_psa_L1        -0.7843 0.2614    -1.2966  -0.7843    -0.2720
## f2time_to_psa_L1         2.1851 0.4053     1.3906   2.1851     2.9795
## mri1_L1                  0.2193 0.1880    -0.1492   0.2193     0.5878
## class_age_at_bx6070_L1  -0.1058 0.2059    -0.5093  -0.1058     0.2978
## class_age_at_bx70_L1     0.2007 0.2377    -0.2651   0.2007     0.6666
## after_txrt_L1            0.9697 0.4221     0.1423   0.9697     1.7971
## after_txunk_L1           0.2544 0.2542    -0.2437   0.2544     0.7526
## f1time_to_psa:mri1_L1   -4.6636 4.0228   -12.5480  -4.6636     3.2209
## f2time_to_psa:mri1_L1  -10.8246 7.7175   -25.9506 -10.8246     4.3014
## Res. err. (variance)     0.4590 0.0314     0.4005   0.4579     0.5239
## 
## Random effects variance-covariance (L1)
##                                      mean     sd 0.025quant 0.5quant 0.975quant
## Intercept_L1                       0.2262 0.1253     0.1040   0.1986     0.5259
## f1time_to_psa_L1                   0.9705 1.4628     0.1714   0.6297     4.1348
## f2time_to_psa_L1                   0.1859 0.1518     0.0338   0.1424     0.5886
## Intercept_L1:f1time_to_psa_L1     -0.0120 0.3152    -0.5829   0.0194     0.3672
## Intercept_L1:f2time_to_psa_L1      0.0155 0.1170    -0.2132   0.0081     0.2693
## f1time_to_psa_L1:f2time_to_psa_L1  0.1382 0.3983    -0.2008   0.0224     1.1353
## 
## Survival outcome (S1)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1  0.0284 0.0554     0.0007   0.0110     0.1709
## mri1_S1                      0.4376 0.8389    -1.2066   0.4376     2.0817
## class_age_at_bx6070_S1      -0.3112 0.8938    -2.0630  -0.3112     1.4407
## class_age_at_bx70_S1        -0.7998 1.0489    -2.8557  -0.7998     1.2561
## after_txrt_S1               -1.9794 1.8251    -5.5565  -1.9794     1.5977
## after_txunk_S1              -0.2459 1.0790    -2.3606  -0.2459     1.8688
## 
## Frailty term variance (S1)
##                  mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 4.3621 1.9741      1.661   3.9819      9.316
## 
## Survival outcome (S2)
##                                mean     sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2  0.0504 0.0811     0.0023   0.0242     0.2683
## mri1_S2                      0.4830 0.9986    -1.4742   0.4830     2.4403
## class_age_at_bx6070_S2      -0.4422 1.0539    -2.5078  -0.4422     1.6233
## class_age_at_bx70_S2        -1.0144 1.2368    -3.4384  -1.0144     1.4097
## after_txrt_S2               -2.4696 2.1249    -6.6343  -2.4696     1.6950
## after_txunk_S2               2.0236 1.2601    -0.4462   2.0236     4.4934
## 
## Association longitudinal - survival
##             mean     sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.6431 0.8051    -2.2047  -0.6511     0.9655
## CV_L1_S2 -0.1483 0.8807    -1.8652  -0.1541     1.6025
## 
## Association survival - survival
##                     mean     sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.1861 0.1991     0.8014   1.1836     1.5852
## 
## log marginal-likelihood (integration)    log marginal-likelihood (Gaussian) 
##                             -1700.249                             -1688.015 
## 
## Deviance Information Criterion:  -30.77604
## Widely applicable Bayesian information criterion:  -88.97328
## Computation time: 2.96 seconds

ONLY +T2PSA By GG for (gg in c(‘1’,‘2’,‘3’,‘4’,‘5’)) {

Third joint model, combinning 2 survival modals and the longitudinal one LJM2_tmp <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ mri+class_age_at_bx+after_tx + (1|id), inla.surv(drfs_time, drfs_status) ~ mri+class_age_at_bx+after_tx), formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id), id=“id”, timeVar=“time_to_psa”, basRisk=c(“rw2”, “rw2”), # random walk assocSurv=TRUE, family=“gaussian”, assoc=c(“CV”, “CV”), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t control=list(int.strategy=“eb”), dataSurv=list(surv_nolong_data[(surv_nolong_data\(id %in% surv_long_ppsa_data\)id) & (surv_nolong_data\(grade_group == gg),]), dataLong=surv_long_ppsa_data[surv_long_ppsa_data\)grade_group == gg,] ) cat(“Current GG is”, gg)

summary(LJM2_tmp)

}

desc_stat_mod

Batouche A.O

2024-02-11