# No packages to install, so far
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
abx <- read_csv("datafolder/bx.csv", col_select = -...1)
## New names:
## Rows: 12277 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (5): id, grade_group, mri, time_to_mri, age_at_bx dttm (3): bx_date, mri_date,
## last_data_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
apsa <- read_csv("datafolder/psa.csv", col_select = -...1)
## New names:
## Rows: 194195 Columns: 4
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," dbl
## (3): id, psa_results, time_to_psa dttm (1): psa_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
abcr <- read_csv("datafolder/bcr.csv", col_select = -...1)
## New names:
## Rows: 2684 Columns: 19
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): after_tx dbl (16): id, grade_group, mri, time_to_tx, tx_to_bcr, tx_to_end,
## time_to_b... dttm (2): tx_date, bcr_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
atfs <- read_csv("datafolder/tfs.csv", col_select = -...1)
## New names:
## Rows: 12277 Columns: 9
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): type_of_treatment dbl (7): id, grade_group, mri, tfs_status, tfs_time,
## tfs5_status, tfs5_time dttm (1): treatment_date
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
summary(abx)
## id bx_date grade_group
## Min. : 1 Min. :1993-09-09 00:00:00.00 Min. :1.000
## 1st Qu.: 3129 1st Qu.:2008-11-14 00:00:00.00 1st Qu.:1.000
## Median : 6265 Median :2012-11-27 00:00:00.00 Median :2.000
## Mean : 6269 Mean :2012-02-01 22:53:43.77 Mean :2.495
## 3rd Qu.: 9409 3rd Qu.:2016-03-31 00:00:00.00 3rd Qu.:3.000
## Max. :12545 Max. :2019-06-13 00:00:00.00 Max. :5.000
##
## mri_date mri time_to_mri
## Min. :1999-08-10 00:00:00.00 Min. :0.0000 Min. :-12.000
## 1st Qu.:2016-08-03 00:00:00.00 1st Qu.:0.0000 1st Qu.: -1.000
## Median :2017-09-12 00:00:00.00 Median :0.0000 Median : -1.000
## Mean :2017-05-01 01:55:21.68 Mean :0.1454 Mean : -1.168
## 3rd Qu.:2018-08-09 00:00:00.00 3rd Qu.:0.0000 3rd Qu.: -1.000
## Max. :2019-06-07 00:00:00.00 Max. :1.0000 Max. : 28.000
## NA's :10492 NA's :10492
## age_at_bx last_data_date
## Min. :37.00 Min. :1994-11-07 00:00:00.00
## 1st Qu.:63.00 1st Qu.:2017-02-20 13:37:00.00
## Median :68.00 Median :2020-09-14 09:12:00.00
## Mean :67.71 Mean :2018-05-02 13:55:13.81
## 3rd Qu.:73.00 3rd Qu.:2021-01-20 11:19:00.00
## Max. :80.00 Max. :2021-05-05 00:00:00.00
##
# Bar plot for grade_group
barplot(table(abx$grade_group), main = "Distribution of grade_group", xlab = "Grade Group", ylab = "Count")
# Box plot for time_to_mri
boxplot(abx$time_to_mri, main = "Distribution of time_to_mri", ylab = "Time to MRI")
# Histogram for age_at_bx
hist(abx$age_at_bx, main = "Distribution of Age at Biopsy", xlab = "Age", ylab = "Frequency")
# Histogram for mri
hist(abx$mri, main = "Distribution of MRI", xlab = "MRI", ylab = "Frequency")
# Convert datetime variables to Date format
abx$bx_date <- as.Date(abx$bx_date)
abx$mri_date <- as.Date(abx$mri_date)
# Plot bx_date
ggplot(abx, aes(x = bx_date)) +
geom_histogram(binwidth = 30, color = "black", fill = "blue") +
labs(title = "Distribution of bx_date", x = "Date", y = "Frequency")
# Plot mri_date
ggplot(abx, aes(x = mri_date)) +
geom_histogram(binwidth = 30, color = "black", fill = "green") +
labs(title = "Distribution of mri_date", x = "Date", y = "Frequency")
## Warning: Removed 10492 rows containing non-finite values (`stat_bin()`).
summary(apsa)
## id psa_results psa_date
## Min. : 1 Min. : 0.00 Min. :1998-05-26 07:01:00.00
## 1st Qu.: 3031 1st Qu.: 0.08 1st Qu.:2011-01-03 11:56:00.00
## Median : 6098 Median : 1.10 Median :2015-01-21 10:17:00.00
## Mean : 6096 Mean : 14.77 Mean :2014-04-03 21:36:56.91
## 3rd Qu.: 9119 3rd Qu.: 5.80 3rd Qu.:2018-03-13 07:45:00.00
## Max. :12227 Max. :76800.00 Max. :2021-04-08 08:09:00.00
## time_to_psa
## Min. :-244.00
## 1st Qu.: 4.00
## Median : 25.00
## Mean : 30.67
## 3rd Qu.: 60.00
## Max. : 284.00
# Convert datetime variable to POSIXct format
apsa$psa_date <- as.POSIXct(apsa$psa_date)
# Plot frequency of psa_date
apsa$year <- format(apsa$psa_date, "%Y")
hist(as.numeric(apsa$year), main = "Frequency of PSA tests", xlab = "Date", ylab = "Frequency", breaks = 30)
# Apply log transformation to psa_results
apsa$log_psa_results <- log(apsa$psa_results + 1)
hist(as.numeric(apsa$log_psa_results)[apsa$time_to_psa==0], main = "Dist LOG PSA at baseline (ref)", xlab = "log(PSA+1)", ylab = "Frequency", breaks = 30)
# Calculate the average log_psa_result
average_log_psa <- mean(apsa$log_psa_results[apsa$time_to_psa == 0])
# Center the log_psa_result around its average
centered_log_psa <- apsa$log_psa_results - average_log_psa
# Plot the histogram
hist(centered_log_psa, main = "Centered Dist LOG PSA at baseline", xlab = "log(PSA+1) - avg", ylab = "Frequency", breaks = 30)
hist(as.numeric(apsa$psa_results)[apsa$time_to_psa==0 & apsa$psa_results<100], main = "Dist PSA at baseline", xlab = "PSA < 100 at baseline", ylab = "Frequency", breaks = 10)
# Plot histogram for Time to PSA
hist(apsa$time_to_psa, main = "Distribution of Time to PSA", xlab = "Time to PSA", ylab = "Frequency")
# Plot histogram for + Time to PSA
hist(apsa$time_to_psa[apsa$time_to_psa>=0], main = "Distribution of +Time to PSA", xlab = "+Time to PSA", ylab = "Frequency")
# Plot histogram for + Time to PSA
hist(log(apsa$time_to_psa+1)[apsa$time_to_psa>=0], main = "Distribution of LOG +Time to PSA", xlab = "+Time to PSA", ylab = "Frequency")
## Warning in log(apsa$time_to_psa + 1): NaNs produced
# Plot log(psa_results + 0.1) against time_to_psa as spaghetti plot
matplot(apsa$time_to_psa, apsa$log_psa_results, type = "l", main = "Spaghetti Plot of Log Transformed PSA Results Over Time", xlab = "Time to psa", ylab = "Log(PSA + 0.1)", col = 1:nrow(apsa), lty = 1)
# Plot log(psa_results + 0.1) against time_to_psa>0 as spaghetti plot
pos_apsa <- apsa[apsa$time_to_psa>0,]
matplot(pos_apsa$time_to_psa, pos_apsa$log_psa_results, type = "l", main = "Spaghetti Plot of Log Post-Bx Transformed PSA Results Over Time", xlab = "Time to psa", ylab = "Log(PSA + 0.1)", col = 1:nrow(pos_apsa), lty = 1)
# ggplot2, nicer plots
meta_apsa <- merge(pos_apsa,
abx[, c('id', 'grade_group', 'mri','age_at_bx')], on='id', all=TRUE)
p <- ggplot(data = meta_apsa, aes(x = time_to_psa, y = log_psa_results, group = id, colour=grade_group))
# all
p +
geom_line() +
stat_smooth(aes(group = 1), color='red') +
stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3)
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 458 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 458 rows containing missing values (`geom_line()`).
# By GG
p +
geom_line() +
stat_smooth(aes(group = 1), color='red') +
#stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3, color='yellow')+
facet_grid(. ~ grade_group)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).
## Removed 458 rows containing missing values (`geom_line()`).
p +
geom_line() +
stat_smooth(aes(group = 1), color='red') +
#stat_summary(aes(group = 1), geom = "point", fun.y = mean, shape = 17, size = 3, color='yellow')+
facet_grid(. ~ mri)
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 458 rows containing non-finite values (`stat_smooth()`).
## Removed 458 rows containing missing values (`geom_line()`).
summary(abcr)
## id grade_group mri after_tx
## Min. : 2 Min. :1.000 Min. :0.0000 Length:2684
## 1st Qu.: 2921 1st Qu.:2.000 1st Qu.:0.0000 Class :character
## Median : 6002 Median :3.000 Median :0.0000 Mode :character
## Mean : 5989 Mean :2.865 Mean :0.1073
## 3rd Qu.: 8993 3rd Qu.:4.000 3rd Qu.:0.0000
## Max. :12224 Max. :5.000 Max. :1.0000
## tx_date time_to_tx tx_to_bcr
## Min. :1999-09-06 08:44:00.00 Min. : 0.00 Min. : 0.00
## 1st Qu.:2009-09-09 18:00:00.00 1st Qu.: 3.00 1st Qu.: 16.00
## Median :2012-11-30 00:00:00.00 Median : 4.00 Median : 34.00
## Mean :2012-07-06 19:29:22.77 Mean : 11.91 Mean : 42.72
## 3rd Qu.:2015-11-16 06:00:00.00 3rd Qu.: 7.00 3rd Qu.: 60.00
## Max. :2020-04-16 00:00:00.00 Max. :177.00 Max. :194.00
## tx_to_end bcr_date time_to_bcr
## Min. : 2.00 Min. :2000-07-24 08:00:00.00 Min. : 2.00
## 1st Qu.: 41.00 1st Qu.:2012-12-18 07:43:30.00 1st Qu.: 24.00
## Median : 69.00 Median :2016-01-17 00:00:00.00 Median : 44.00
## Mean : 75.92 Mean :2015-06-15 20:45:10.81 Mean : 54.63
## 3rd Qu.:107.00 3rd Qu.:2018-07-30 20:15:45.00 3rd Qu.: 74.00
## Max. :238.00 Max. :2021-04-07 11:10:00.00 Max. :226.00
## time_to_end rfs_status rfs_time drfs_status drfs_time
## Min. : 3.00 Min. :1 Min. : 0.00 Min. :1 Min. : 2.00
## 1st Qu.: 50.00 1st Qu.:1 1st Qu.: 16.00 1st Qu.:1 1st Qu.: 24.00
## Median : 80.00 Median :1 Median : 34.00 Median :1 Median : 44.00
## Mean : 87.83 Mean :1 Mean : 42.72 Mean :1 Mean : 54.63
## 3rd Qu.:120.00 3rd Qu.:1 3rd Qu.: 60.00 3rd Qu.:1 3rd Qu.: 74.00
## Max. :265.00 Max. :1 Max. :194.00 Max. :1 Max. :226.00
## rfs5_status rfs5_time drfs5_status drfs5_time
## Min. :0.0000 Min. : 0.00 Min. :0.0000 Min. : 2.00
## 1st Qu.:1.0000 1st Qu.:16.00 1st Qu.:0.0000 1st Qu.:24.00
## Median :1.0000 Median :34.00 Median :1.0000 Median :44.00
## Mean :0.7511 Mean :34.78 Mean :0.6505 Mean :41.06
## 3rd Qu.:1.0000 3rd Qu.:60.00 3rd Qu.:1.0000 3rd Qu.:60.00
## Max. :1.0000 Max. :60.00 Max. :1.0000 Max. :60.00
barplot(table(abcr$grade_group), main = "Distribution of grade_group", xlab = "Grade Group", ylab = "Count")
table(abcr$grade_group)
##
## 1 2 3 4 5
## 606 555 638 365 520
summary(atfs)
## id grade_group mri
## Min. : 1 Min. :1.000 Min. :0.0000
## 1st Qu.: 3129 1st Qu.:1.000 1st Qu.:0.0000
## Median : 6265 Median :2.000 Median :0.0000
## Mean : 6269 Mean :2.495 Mean :0.1454
## 3rd Qu.: 9409 3rd Qu.:3.000 3rd Qu.:0.0000
## Max. :12545 Max. :5.000 Max. :1.0000
##
## treatment_date type_of_treatment tfs_status
## Min. :1999-06-07 11:43:00.00 Length:12277 Min. :0.0000
## 1st Qu.:2010-10-26 00:00:00.00 Class :character 1st Qu.:1.0000
## Median :2014-02-20 00:00:00.00 Mode :character Median :1.0000
## Mean :2013-08-09 03:45:34.11 Mean :0.7764
## 3rd Qu.:2016-12-21 00:00:00.00 3rd Qu.:1.0000
## Max. :2021-04-29 00:00:00.00 Max. :1.0000
## NA's :2745
## tfs_time tfs5_status tfs5_time
## Min. : 0.00 Min. :0.0000 Min. : 0.0
## 1st Qu.: 2.00 1st Qu.:0.0000 1st Qu.: 2.0
## Median : 4.00 Median :1.0000 Median : 4.0
## Mean : 24.66 Mean :0.7327 Mean :16.4
## 3rd Qu.: 25.00 3rd Qu.:1.0000 3rd Qu.:25.0
## Max. :259.00 Max. :1.0000 Max. :60.0
##
table(atfs$grade_group)
##
## 1 2 3 4 5
## 3652 3313 2503 1195 1614
table(atfs$mri)
##
## 0 1
## 10492 1785
Import INLA and INLAjoint
library(INLA)
## Loading required package: Matrix
## Loading required package: sp
## This is INLA_24.02.09 built 2024-02-09 03:43:24 UTC.
## - See www.r-inla.org/contact-us for how to get help.
## - List available models/likelihoods/etc with inla.list.models()
## - Use inla.doc(<NAME>) to access documentation
#inla.update(testing=TRUE)
library(INLAjoint)
## Package 'INLAjoint' version 24.2.5
## Type 'citation("INLAjoint")' for citing this R package in publications.
# First I have to combine (back) the tfs and bcr data in one dataframe
surv_data <- merge(abcr[, c('id', 'drfs_status', 'drfs_time', 'after_tx')],
abx[, c('id', 'grade_group', 'mri','age_at_bx')], on='id', all=TRUE)
surv_data <- merge(surv_data,
atfs[, c('id', 'type_of_treatment', 'tfs_status', 'tfs_time')], on='id', all=TRUE)
# New ID
#surv_data$id <- 1:nrow(surv_data)
# Re-Order
#surv_data <- surv_data[, c(6, 5, 4, 3, 2)]
# Remove where both time to treatment/bcr = 0 and event = 0 (this should be update when converting time to days)
all_null_tfs <- surv_data[surv_data$tfs_time==0 & surv_data$tfs_status==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_tfs$id), ]
all_null_drfs <- surv_data[surv_data$drfs_time==0 & surv_data$drfs_status==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_drfs$id), ]
# Now if no BCR data then rfs status = 0 and rfs time = tfs time [to fill out the NULL cells]
surv_data$drfs_status <- ifelse(is.na(surv_data$drfs_status), 0, surv_data$drfs_status)
surv_data$drfs_time <- ifelse(is.na(surv_data$drfs_time), surv_data$tfs_time, surv_data$drfs_time)
# Remove patients when time to tx/bcr = 0
all_null_tfs_drfs <- surv_data[surv_data$tfs_time==0 & surv_data$drfs_time==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_tfs_drfs$id), ]
# if the tfs_status = 1 and after tx ????
M1 <- joint(formSurv =list(inla.surv(time = tfs_time, event = tfs_status) ~ -1),
basRisk = c("rw2"),
dataSurv = list(surv_data))
summary(M1)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 1.4005 0.4296 0.7507 1.335 2.4293
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -25069.39 -25069.39
##
## Deviance Information Criterion: 50050.01
## Widely applicable Bayesian information criterion: 50050.38
## Computation time: 1.32 seconds
plot(M1)
## $Baseline
##
## attr(,"class")
## [1] "plot.INLAjoint" "list"
M2 <- joint(formSurv = list(inla.surv(time = drfs_time, event = drfs_status) ~ -1),
basRisk = c("rw2"),
dataSurv = list(surv_data))
summary(M2)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.375 0.1596 0.1562 0.344 0.776
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -11180.02 -11180.02
##
## Deviance Information Criterion: 22294.39
## Widely applicable Bayesian information criterion: 22294.2
## Computation time: 1.32 seconds
plot(M2)
## $Baseline
##
## attr(,"class")
## [1] "plot.INLAjoint" "list"
plot(M2)$Baseline+scale_y_log10()
JM1 <- joint(formSurv=list(inla.surv(time = tfs_time, event = tfs_status) ~ -1+(1|id),
inla.surv(time = drfs_time, event = drfs_status) ~ -1), id="id",
basRisk=c("rw2", "rw2"), assocSurv=TRUE, NbasRisk = 30,
dataSurv = list(surv_data))
## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Max id is 12541 but there are only 11577 individuals with longitudinal
## records, I'm reassigning ids
## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : The hyperparameters skewness correction seems abnormal, this can be a sign
## of an ill-defined model and/or issues with the fit.
## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Stupid local search strategy used: This can be a sign of a ill-defined
## model and/or non-informative data.
summary(JM1)
##
## Survival outcome (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 2.004 0.5085 1.1858 1.9437 3.1764
##
## Frailty term variance (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 0.9765 0.1155 0.7769 0.9666 1.2302
##
## Survival outcome (S2)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 1.3623 0.3328 0.8618 1.3083 2.162
##
## Association survival - survival
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 3.7982 0.2264 3.3352 3.804 4.2261
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -36738.35 -36738.35
##
## Deviance Information Criterion: 74905.69
## Widely applicable Bayesian information criterion: 31895535
## Computation time: 23.84 seconds
plot(JM1)$Baseline+scale_y_log10()
# Grade group as a factor
surv_data$grade_group <- as.factor(surv_data$grade_group)
# MRI as a factor
surv_data$mri <- as.factor(surv_data$mri)
# drfs status and tfs_statusas a factor
surv_data$drfs_status <- as.factor(surv_data$drfs_status)
surv_data$tfs_status <- as.factor(surv_data$tfs_status)
# Classify 'Age' to: a<=65 , 65<a<=70 , 70>a
# Create a new column class_age_at_bx as a factor with 3 levels
surv_data$class_age_at_bx <- cut(surv_data$age_at_bx,
breaks = c(-Inf, 60, 70, Inf),
labels = c("<60", "60-70", ">70"),
include.lowest = TRUE)
# Print a summary of the new attribute
table(surv_data$class_age_at_bx)
##
## <60 60-70 >70
## 2056 5096 4425
# Summary of the data
summary(surv_data)
## id drfs_status drfs_time after_tx grade_group
## Min. : 1 0:8893 Min. : 1.00 Length:11577 1:3513
## 1st Qu.: 3054 1:2684 1st Qu.: 3.00 Class :character 2:3182
## Median : 6124 Median : 14.00 Mode :character 3:2397
## Mean : 6121 Mean : 36.13 4:1116
## 3rd Qu.: 9178 3rd Qu.: 52.00 5:1369
## Max. :12541 Max. :259.00
## mri age_at_bx type_of_treatment tfs_status tfs_time
## 0:9858 Min. :39.00 Length:11577 0:2342 Min. : 0.00
## 1:1719 1st Qu.:63.00 Class :character 1:9235 1st Qu.: 2.00
## Median :68.00 Mode :character Median : 4.00
## Mean :67.61 Mean : 26.15
## 3rd Qu.:73.00 3rd Qu.: 28.00
## Max. :80.00 Max. :259.00
## class_age_at_bx
## <60 :2056
## 60-70:5096
## >70 :4425
##
##
##
JM2 <- joint(formSurv=list(inla.surv(time = tfs_time, event = tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx+(1|id),
inla.surv(time = drfs_time, event = drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx), id="id",
basRisk=c("rw2", "rw2"), assocSurv=TRUE, NbasRisk = 30,
dataSurv = list(surv_data))
## Warning in joint(formSurv = list(inla.surv(time = tfs_time, event = tfs_status)
## ~ : Max id is 12541 but there are only 11577 individuals with longitudinal
## records, I'm reassigning ids
summary(JM2)
##
## Survival outcome (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 0.3945 0.0364 0.3440 0.3876 0.4825
## grade_group2_S1 1.3541 0.0558 1.2446 1.3541 1.4636
## grade_group3_S1 1.8129 0.0610 1.6934 1.8129 1.9325
## grade_group4_S1 1.7034 0.0785 1.5494 1.7034 1.8574
## grade_group5_S1 2.3105 0.0741 2.1653 2.3104 2.4558
## mri1_S1 0.9038 0.0595 0.7871 0.9038 1.0206
## class_age_at_bx6070_S1 0.3481 0.0590 0.2324 0.3481 0.4637
## class_age_at_bx70_S1 0.5702 0.0609 0.4507 0.5702 0.6897
## after_txrt_S1 0.4825 0.0584 0.3680 0.4825 0.5971
##
## Frailty term variance (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 4.4795 0.0958 4.3034 4.4749 4.6791
##
## Survival outcome (S2)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 0.3576 0.0270 0.3143 0.3541 0.4192
## grade_group2_S2 1.5567 0.0766 1.4067 1.5567 1.7070
## grade_group3_S2 1.9750 0.0837 1.8111 1.9750 2.1392
## grade_group4_S2 1.9112 0.1076 1.7003 1.9112 2.1224
## grade_group5_S2 2.6799 0.1019 2.4803 2.6798 2.8799
## mri1_S2 1.4163 0.0819 1.2557 1.4162 1.5771
## class_age_at_bx6070_S2 0.4339 0.0807 0.2757 0.4339 0.5923
## class_age_at_bx70_S2 0.8903 0.0834 0.7267 0.8902 1.0540
## after_txrt_S2 -2.4713 0.0815 -2.6314 -2.4712 -2.3116
##
## Association survival - survival
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.3782 0.02 1.3359 1.3791 1.4144
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -81382.8 -81382.8
##
## Deviance Information Criterion: 141085
## Widely applicable Bayesian information criterion: 142542.4
## Computation time: 20.3 seconds
plot(JM2)$Baseline+scale_y_log10()
Note:
This plots are not so affirmative the way it is presented, we can transform them to the survival to make more sense.
The longer time to treatment the longer time to relapse, which is expected as this data include several effecting covariates (Confounder), such as age and Gleason grade group.
M1_surv <- joint(formSurv=list(inla.surv(time =tfs_time, event = tfs_status) ~ -1),
basRisk=c("rw2"), dataSurv = surv_data)
summary(M1_surv)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 1.1258 0.3376 0.6126 1.0751 1.9322
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -48079.76 -48079.76
##
## Deviance Information Criterion: 96063.18
## Widely applicable Bayesian information criterion: 96069.51
## Computation time: 1.25 seconds
#plot(M1_surv)
onePatient <- surv_data[1, ]
P <- predict(M1_surv, onePatient, id="id", horizon=300, surv=TRUE)$PredS
## Warning in predict.INLAjoint(M1_surv, onePatient, id = "id", horizon = 300, :
## The fitted model has baseline risk information up until value 259 for survival
## outcome 1. Since you ask for prediction at horizon 300 I will assume constant
## baseline hazard beyond the maximum available value. Alternatively, you can use
## baselineHaz='smooth' to use splines to predict the baseline hazard (for each
## sample). Alternatively, adding 'horizon' in the control options of the inla()
## call allows to extend the baseline beyond the last observed event time (linear
## extension, less flexible than the smooth method).
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 1 on PID: 31741
## Computing survival predictions for individual 1
# plot survival curve for the two time to event outcomes
plot(P$time, P$Surv_quant0.5, type="l", lwd=2, ylim=c(0, 1), xlab="time", ylab="survival probability")
lines(P$time, P$Surv_quant0.025, lty=2)
lines(P$time, P$Surv_quant0.975, lty=2)
# add observed event times
# sapply(surv_data[surv_data$tfs_status==1, "tfs_time"], function(x) abline(v=x, lty=3, lwd=0.5))
M2_surv <- joint(formSurv=list(inla.surv(time = drfs_time, event = drfs_status) ~ -1),
basRisk=c("rw2"), dataSurv = surv_data)
summary(M2_surv)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.4468 0.1685 0.2063 0.4168 0.8625
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -41586.44 -41586.44
##
## Deviance Information Criterion: 83092.81
## Widely applicable Bayesian information criterion: 83097.62
## Computation time: 1.3 seconds
#plot(M2_surv)
P2 <- predict(M2_surv, onePatient, id="id", horizon=300, surv=TRUE)$PredS
## Warning in predict.INLAjoint(M2_surv, onePatient, id = "id", horizon = 300, :
## The fitted model has baseline risk information up until value 259 for survival
## outcome 1. Since you ask for prediction at horizon 300 I will assume constant
## baseline hazard beyond the maximum available value. Alternatively, you can use
## baselineHaz='smooth' to use splines to predict the baseline hazard (for each
## sample). Alternatively, adding 'horizon' in the control options of the inla()
## call allows to extend the baseline beyond the last observed event time (linear
## extension, less flexible than the smooth method).
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 1 on PID: 31741
## Computing survival predictions for individual 1
lines(P2$time, P2$Surv_quant0.5, lwd=2, col=2)
lines(P2$time, P2$Surv_quant0.025, lty=2, col=2)
lines(P2$time, P2$Surv_quant0.975, lty=2, col=2)
# sapply(surv_data[surv_data$drfs_time==1, "drfs_time"], function(x) abline(v=x, lty=3, lwd=0.5, col=2))
legend("topright", c("Treatment", "Relapse"), lwd=2, col=1:2)
# Add Longitudinal data (PSA) to the surv_Data
surv_long_data <- merge(surv_data, apsa[, c('id', 'time_to_psa', 'psa_results','log_psa_results')], by='id')
# Remove NaNs from treatment related cols
# Replace NA values with 'unk'
surv_long_data$after_tx <- ifelse(is.na(surv_long_data$after_tx), 'unk', surv_long_data$after_tx)
surv_long_data$type_of_treatment <- ifelse(is.na(surv_long_data$type_of_treatment), 'unk', surv_long_data$type_of_treatment)
# Convert to factor
surv_long_data$after_tx <- factor(surv_long_data$after_tx)
surv_long_data$type_of_treatment <- factor(surv_long_data$type_of_treatment)
# Uncomment when needed, Sampling
unique_ids <- unique(surv_long_data$id)
sample_size <- round(0.01 * length(unique_ids))
# Sample X.% of the unique IDs
sampled_ids <- sample(unique_ids, sample_size, replace = FALSE)
# Subset the data based on sampled IDs
surv_long_data <- surv_long_data[surv_long_data$id %in% c(sampled_ids, 25), ]
# Reassigning ID
# -> Calculate run-length encoding of the original id column
id_rle <- rle(surv_long_data$id)
# -> Generate a sequence of IDs based on the lengths of runs in the original id column
new_id <- rep(seq_along(id_rle$lengths), id_rle$lengths)
# -> Reassign the new IDs to the id column in surv_long_data
surv_long_data$id <- new_id
# Surv data to be used in the latter join model
surv_nolong_data <- surv_long_data[!duplicated(surv_long_data$id), ]
summary(surv_long_data)
## id drfs_status drfs_time after_tx grade_group mri
## Min. : 1.00 0:1321 Min. : 1.00 rp : 150 1:604 0:1364
## 1st Qu.: 29.00 1: 398 1st Qu.: 4.00 rt : 248 2:541 1: 355
## Median : 58.00 Median : 18.00 unk:1321 3:314
## Mean : 57.39 Mean : 40.42 4:128
## 3rd Qu.: 86.00 3rd Qu.: 54.00 5:132
## Max. :117.00 Max. :231.00
## age_at_bx type_of_treatment tfs_status tfs_time class_age_at_bx
## Min. :49.00 chemo: 0 0: 373 Min. : 0.00 <60 :311
## 1st Qu.:62.00 horm :335 1:1346 1st Qu.: 3.00 60-70:873
## Median :67.00 radio:556 Median : 5.00 >70 :535
## Mean :66.91 rp :455 Mean : 33.24
## 3rd Qu.:72.00 unk :373 3rd Qu.: 40.00
## Max. :80.00 Max. :231.00
## time_to_psa psa_results log_psa_results
## Min. :-156.00 Min. : 0.000 Min. :0.00000
## 1st Qu.: 4.00 1st Qu.: 0.090 1st Qu.:0.08618
## Median : 24.00 Median : 1.160 Median :0.77011
## Mean : 31.07 Mean : 13.614 Mean :1.12239
## 3rd Qu.: 57.00 3rd Qu.: 5.955 3rd Qu.:1.93946
## Max. : 231.00 Max. :3260.000 Max. :8.08979
library(splines)
# We applied log transformation to PSA with shift=1
# Longitudinal Data should be sorted for each patient.
surv_long_data <- surv_long_data[with(surv_long_data, order(id, time_to_psa)), ]
# First joint model for longitudinal marker PSA
LM1 <- joint(formLong=log_psa_results ~ time_to_psa + (1 + time_to_psa | id),
timeVar="time_to_psa",
id="id",
family="gaussian",
control=list(int.strategy="eb"), # To make calculation faster, use only the mean for the hyper-parameter distributions
dataLong=surv_long_data
)
summary(LM1)
## Longitudinal outcome (gaussian)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept 1.3393 0.0734 1.1954 1.3393 1.4832
## time_to_psa -0.0147 0.0089 -0.0322 -0.0147 0.0029
## Res. err. (variance) 0.4463 0.0162 0.4155 0.4460 0.4791
##
## Random effects variance-covariance (L1)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept 0.5571 0.0821 0.4157 0.5502 0.7336
## time_to_psa 0.0092 0.0012 0.0071 0.0091 0.0118
## Intercept:time_to_psa -0.0013 0.0071 -0.0158 -0.0007 0.0119
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -2252.920 -2248.957
##
## Deviance Information Criterion: 3686.216
## Widely applicable Bayesian information criterion: 3743.282
## Computation time: 1.33 seconds
# 643:42 , 1030:41 , 321:37 , 553:37 , 938:36 , 444:36 , 405:33 -> these are some patients with large enough data
patientID = 25 # pick one
patient_counts <- table(surv_long_data$id)
max_count <- max(patient_counts)
patients_with_max_count <- names(patient_counts[patient_counts == max_count])
patientID = patients_with_max_count
ND <- surv_long_data[surv_long_data$id==patientID,]
P1 <- predict(LM1, ND, id="id", horizon=max(surv_long_data$time_to_psa))$PredL # Make Linear prediction
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 74 on PID: 31741
# To plot, should run from here
plot(P1$time_to_psa, P1$quant0.5, type="l", lwd=2, ylim=range(c(P1$quant0.025, P1$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P1$time_to_psa, P1$quant0.025, lty=2)
lines(P1$time_to_psa, P1$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19) # to here , pch=size of the dots
# use splines?
NSplines <- ns(surv_long_data$time_to_psa, knots=c(25)) # natural cubic splines , knots, the starting rise time, look at time to psa summary
f1 <- function(x) predict(NSplines, x)[,1] # first basis
f2 <- function(x) predict(NSplines, x)[,2] # second basis
# check splines
curve(f1, xlim=range(surv_long_data$time_to_psa), ylim=c(-1,1))
curve(f2, xlim=range(surv_long_data$time_to_psa), add=T)
# Second joint model for the longitudinal data, this time accounting for all
LM2 <- joint(formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
timeVar="time_to_psa",
id="id",
family="gaussian",
control=list(int.strategy="eb"),
dataLong=surv_long_data
)
summary(LM2)
## Longitudinal outcome (gaussian)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept 1.2115 0.4835 0.2639 1.2115 2.1592
## f1time_to_psa -2.4374 0.7851 -3.9762 -2.4374 -0.8986
## f2time_to_psa -4.6243 1.8254 -8.2021 -4.6243 -1.0465
## grade_group2 0.7210 0.5707 -0.3976 0.7210 1.8396
## grade_group3 -0.9281 0.6808 -2.2624 -0.9281 0.4063
## grade_group4 1.4141 0.9685 -0.4842 1.4141 3.3124
## grade_group5 -0.4363 0.8244 -2.0521 -0.4363 1.1796
## mri1 -0.1095 0.1365 -0.3770 -0.1095 0.1580
## class_age_at_bx6070 0.2621 0.1779 -0.0866 0.2621 0.6107
## class_age_at_bx70 0.5424 0.1857 0.1784 0.5424 0.9064
## after_txrt 0.8043 0.2904 0.2351 0.8043 1.3734
## after_txunk 0.1688 0.2218 -0.2660 0.1688 0.6036
## f1time_to_psa:grade_group2 -3.7616 1.0947 -5.9071 -3.7616 -1.6161
## f1time_to_psa:grade_group3 -1.3502 1.2783 -3.8557 -1.3502 1.1553
## f1time_to_psa:grade_group4 -2.3859 1.8599 -6.0312 -2.3859 1.2594
## f1time_to_psa:grade_group5 1.2357 1.5673 -1.8362 1.2357 4.3076
## f2time_to_psa:grade_group2 -3.1997 2.7304 -8.5512 -3.1997 2.1518
## f2time_to_psa:grade_group3 -5.2452 3.2391 -11.5937 -5.2452 1.1033
## f2time_to_psa:grade_group4 2.2777 4.3371 -6.2229 2.2777 10.7783
## f2time_to_psa:grade_group5 1.7360 4.3266 -6.7440 1.7360 10.2159
## Res. err. (variance) 0.4028 0.0158 0.3722 0.4027 0.4341
##
## Random effects variance-covariance (L1)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept 3.9518 3.5905 1.1649 2.9877 12.1458
## f1time_to_psa 13.2831 3.8321 7.6596 12.7082 22.4594
## f2time_to_psa 134.9731 28.0316 87.8352 132.3635 200.5550
## Intercept:f1time_to_psa 0.4751 3.2939 -7.6999 1.2702 3.7994
## Intercept:f2time_to_psa 18.1205 7.5477 8.1619 16.7820 36.4451
## f1time_to_psa:f2time_to_psa 26.7543 9.9779 5.2259 27.0630 45.1522
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -2108.808 -2102.088
##
## Deviance Information Criterion: 3487.443
## Widely applicable Bayesian information criterion: 3555.614
## Computation time: 2.16 seconds
# Again, pick one patient (here the same)
ND2 <- surv_long_data[surv_long_data$id==patientID,] # observed vs. fitted for a couple individuals.
P2 <- predict(LM2, ND, id="id", horizon=max(surv_long_data$time_to_psa))$PredL
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 74 on PID: 31741
plot(P2$time_to_psa, P2$quant0.5, type="l", lwd=2, ylim=range(c(P2$quant0.025, P2$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P2$time_to_psa, P2$quant0.025, lty=2)
lines(P2$time_to_psa, P2$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19)
#
inla.setOption(num.threads='8:1')
# Third joint model, combinning 2 survival modals and the longitudinal one
LJM1 <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx + (1|id),
inla.surv(drfs_time, drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx),
formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
id="id",
timeVar="time_to_psa",
basRisk=c("rw2", "rw2"), # random walk
assocSurv=TRUE,
family="gaussian",
assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
control=list(int.strategy="eb"),
dataSurv=list(surv_nolong_data),
dataLong=surv_long_data
)
summary(LJM1)
## Longitudinal outcome (gaussian)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept_L1 1.2444 0.4767 0.3102 1.2444 2.1787
## f1time_to_psa_L1 -2.4534 0.7825 -3.9871 -2.4534 -0.9197
## f2time_to_psa_L1 -4.5632 1.7988 -8.0887 -4.5632 -1.0376
## grade_group2_L1 0.6752 0.5605 -0.4233 0.6752 1.7738
## grade_group3_L1 -0.9331 0.6703 -2.2469 -0.9331 0.3808
## grade_group4_L1 1.3646 0.9564 -0.5098 1.3646 3.2390
## grade_group5_L1 -0.4683 0.8111 -2.0581 -0.4683 1.1214
## mri1_L1 -0.1331 0.1349 -0.3975 -0.1331 0.1312
## class_age_at_bx6070_L1 0.2623 0.1773 -0.0852 0.2623 0.6098
## class_age_at_bx70_L1 0.5376 0.1848 0.1755 0.5376 0.8997
## after_txrt_L1 0.8004 0.2872 0.2375 0.8004 1.3633
## after_txunk_L1 0.1612 0.2194 -0.2687 0.1612 0.5912
## f1time_to_psa:grade_group2_L1 -3.6727 1.0906 -5.8103 -3.6727 -1.5352
## f1time_to_psa:grade_group3_L1 -1.3250 1.2754 -3.8248 -1.3250 1.1748
## f1time_to_psa:grade_group4_L1 -2.2551 1.8558 -5.8924 -2.2551 1.3822
## f1time_to_psa:grade_group5_L1 1.2983 1.5668 -1.7725 1.2983 4.3691
## f2time_to_psa:grade_group2_L1 -3.2060 2.6914 -8.4810 -3.2060 2.0691
## f2time_to_psa:grade_group3_L1 -5.2278 3.1932 -11.4865 -5.2278 1.0308
## f2time_to_psa:grade_group4_L1 2.3189 4.2829 -6.0754 2.3189 10.7131
## f2time_to_psa:grade_group5_L1 1.6850 4.2736 -6.6911 1.6850 10.0610
## Res. err. (variance) 0.4032 0.0157 0.3735 0.4028 0.4352
##
## Random effects variance-covariance (L1)
## mean sd 0.025quant 0.5quant
## Intercept_L1 3.4037 2.4991 1.1437 2.8060
## f1time_to_psa_L1 13.0074 3.6435 7.5592 12.5538
## f2time_to_psa_L1 127.2802 23.6713 85.3376 125.8892
## Intercept_L1:f1time_to_psa_L1 0.5998 2.4390 -4.7301 1.1661
## Intercept_L1:f2time_to_psa_L1 16.6927 6.0326 7.9048 15.8057
## f1time_to_psa_L1:f2time_to_psa_L1 25.8435 8.4248 8.7672 25.9323
## 0.975quant
## Intercept_L1 8.9143
## f1time_to_psa_L1 21.6029
## f2time_to_psa_L1 177.4183
## Intercept_L1:f1time_to_psa_L1 3.0623
## Intercept_L1:f2time_to_psa_L1 31.2052
## f1time_to_psa_L1:f2time_to_psa_L1 42.6080
##
## Survival outcome (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 0.0556 0.0245 0.0229 0.0505 0.1180
## grade_group2_S1 3.1912 0.8173 1.5894 3.1912 4.7931
## grade_group3_S1 2.9709 0.9976 1.0157 2.9709 4.9261
## grade_group4_S1 2.0540 1.4025 -0.6948 2.0540 4.8029
## grade_group5_S1 3.4948 1.3242 0.8994 3.4948 6.0902
## mri1_S1 1.5195 0.8084 -0.0649 1.5195 3.1039
## class_age_at_bx6070_S1 0.2104 0.9087 -1.5706 0.2104 1.9914
## class_age_at_bx70_S1 0.6940 0.9793 -1.2253 0.6940 2.6134
## after_txrt_S1 -1.1795 1.5389 -4.1957 -1.1795 1.8367
## after_txunk_S1 -1.3235 1.1039 -3.4870 -1.3235 0.8400
##
## Frailty term variance (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 11.6985 2.0526 7.9699 11.6206 15.993
##
## Survival outcome (S2)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 0.0521 0.0218 0.0198 0.0489 0.1043
## grade_group2_S2 3.2262 0.8534 1.5535 3.2262 4.8989
## grade_group3_S2 3.1058 1.0443 1.0590 3.1058 5.1526
## grade_group4_S2 1.9393 1.4725 -0.9467 1.9393 4.8253
## grade_group5_S2 3.5001 1.3827 0.7900 3.5001 6.2101
## mri1_S2 1.9643 0.8487 0.3008 1.9643 3.6278
## class_age_at_bx6070_S2 0.1497 0.9523 -1.7168 0.1497 2.0163
## class_age_at_bx70_S2 0.5012 1.0269 -1.5114 0.5012 2.5139
## after_txrt_S2 -0.9977 1.5937 -4.1214 -0.9977 2.1260
## after_txunk_S2 1.0270 1.1496 -1.2262 1.0270 3.2802
##
## Association longitudinal - survival
## mean sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.0365 0.1443 -0.3259 -0.0347 0.2422
## CV_L1_S2 0.1891 0.1719 -0.1486 0.1888 0.5282
##
## Association survival - survival
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.043 0.0926 0.864 1.0419 1.2286
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -5532.715 -5520.481
##
## Deviance Information Criterion: -894.2573
## Widely applicable Bayesian information criterion: -1032.216
## Computation time: 6.85 seconds
surv_long_ppsa_data <- surv_long_data[surv_long_data$time_to_psa >= 0, ]
# First joint model for longitudinal marker PSA
LM1_v2 <- joint(formLong=log_psa_results ~ time_to_psa + (1 + time_to_psa | id),
timeVar="time_to_psa",
id="id",
family="gaussian",
control=list(int.strategy="eb"), # To make calculation faster, use only the mean for the hyper-parameter distributions
dataLong=surv_long_ppsa_data
)
summary(LM1_v2)
## Longitudinal outcome (gaussian)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept 1.1826 0.0920 1.0023 1.1826 1.3628
## time_to_psa -0.0156 0.0094 -0.0340 -0.0156 0.0027
## Res. err. (variance) 0.3320 0.0137 0.3060 0.3317 0.3599
##
## Random effects variance-covariance (L1)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept 0.8729 0.1304 0.6507 0.8636 1.1581
## time_to_psa 0.0098 0.0013 0.0075 0.0097 0.0124
## Intercept:time_to_psa -0.0121 0.0099 -0.0326 -0.0118 0.0067
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -1692.643 -1688.679
##
## Deviance Information Criterion: 2434.868
## Widely applicable Bayesian information criterion: 2655.689
## Computation time: 1.38 seconds
# 643:42 , 1030:41 , 321:37 , 553:37 , 938:36 , 444:36 , 405:33 -> these are some patients with large enough data
patientID = 25 # pick one
max_psa_patient <- surv_long_ppsa_data$id[which.max(surv_long_ppsa_data$PSA_results)]
patient_counts <- table(surv_long_ppsa_data$id)
max_count <- max(patient_counts)
patients_with_max_count <- names(patient_counts[patient_counts == max_count])
patientID = patients_with_max_count
ND <- surv_long_ppsa_data[surv_long_ppsa_data$id==patientID,]
P1 <- predict(LM1_v2, ND, id="id", horizon=max(surv_long_ppsa_data$time_to_psa))$PredL # Make Linear prediction
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 74 on PID: 31741
# To plot, should run from here
plot(P1$time_to_psa, P1$quant0.5, type="l", lwd=2, ylim=range(c(P1$quant0.025, P1$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P1$time_to_psa, P1$quant0.025, lty=2)
lines(P1$time_to_psa, P1$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19) # to here , pch=size of the dots
# use splines?
NSplines <- ns(surv_long_ppsa_data$time_to_psa, knots=c(25)) # natural cubic splines , knots, the starting rise time, look at time to psa summary
f1 <- function(x) predict(NSplines, x)[,1] # first basis
f2 <- function(x) predict(NSplines, x)[,2] # second basis
# check splines
curve(f1, xlim=range(surv_long_ppsa_data$time_to_psa), ylim=c(-1,1))
curve(f2, xlim=range(surv_long_ppsa_data$time_to_psa), add=T)
# Second joint model for the longitudinal data, this time accounting for all
LM2_v2 <- joint(formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
timeVar="time_to_psa",
id="id",
family="gaussian",
control=list(int.strategy="eb"),
dataLong=surv_long_ppsa_data
)
summary(LM2_v2)
## Longitudinal outcome (gaussian)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept 0.9918 0.3354 0.3343 0.9918 1.6492
## f1time_to_psa -1.7051 0.5043 -2.6936 -1.7051 -0.7167
## f2time_to_psa 0.5531 0.3341 -0.1017 0.5531 1.2078
## grade_group2 -0.6071 0.2219 -1.0421 -0.6071 -0.1722
## grade_group3 -0.6836 0.2681 -1.2090 -0.6836 -0.1582
## grade_group4 -0.9504 0.3801 -1.6954 -0.9504 -0.2055
## grade_group5 -0.8410 0.3571 -1.5409 -0.8410 -0.1410
## mri1 0.0110 0.1833 -0.3482 0.0110 0.3702
## class_age_at_bx6070 0.1511 0.2106 -0.2617 0.1511 0.5639
## class_age_at_bx70 0.6134 0.2266 0.1694 0.6134 1.0574
## after_txrt 1.3934 0.3777 0.6532 1.3934 2.1336
## after_txunk 0.4360 0.2934 -0.1391 0.4360 1.0111
## f1time_to_psa:grade_group2 0.4454 0.7389 -1.0028 0.4454 1.8935
## f1time_to_psa:grade_group3 0.4420 0.8772 -1.2772 0.4420 2.1612
## f1time_to_psa:grade_group4 1.4517 1.2481 -0.9946 1.4517 3.8979
## f1time_to_psa:grade_group5 3.1269 1.4766 0.2329 3.1269 6.0209
## f2time_to_psa:grade_group2 2.3665 0.5431 1.3021 2.3665 3.4309
## f2time_to_psa:grade_group3 1.9749 0.5474 0.9019 1.9749 3.0478
## f2time_to_psa:grade_group4 -0.8102 0.6849 -2.1527 -0.8102 0.5322
## f2time_to_psa:grade_group5 0.7821 2.2827 -3.6918 0.7821 5.2561
## Res. err. (variance) 0.3161 0.0133 0.2909 0.3158 0.3432
##
## Random effects variance-covariance (L1)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept 0.9433 0.4533 0.5340 0.8485 1.8545
## f1time_to_psa 8.0354 3.7853 3.3455 7.4476 16.9789
## f2time_to_psa 1.1070 0.9675 0.1861 0.8312 3.6266
## Intercept:f1time_to_psa -1.0611 1.0419 -3.2209 -0.9695 0.5956
## Intercept:f2time_to_psa 0.5666 0.5237 -0.0651 0.4643 1.6237
## f1time_to_psa:f2time_to_psa -2.0608 1.3205 -5.1128 -1.8495 -0.0936
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -1554.261 -1547.541
##
## Deviance Information Criterion: 2392.827
## Widely applicable Bayesian information criterion: 2566.361
## Computation time: 2.11 seconds
# Again, pick one patient (here the same)
ND2 <- surv_long_ppsa_data[surv_long_ppsa_data$id==patientID,] # observed vs. fitted for a couple individuals.
P2 <- predict(LM2_v2, ND, id="id", horizon=max(surv_long_ppsa_data$time_to_psa))$PredL
## Start sampling
## Sampling done.
## Computing longitudinal predictions for individual 74 on PID: 31741
plot(P2$time_to_psa, P2$quant0.5, type="l", lwd=2, ylim=range(c(P2$quant0.025, P2$quant0.975)), xlab="time", ylab="log (PSA+1)")
lines(P2$time_to_psa, P2$quant0.025, lty=2)
lines(P2$time_to_psa, P2$quant0.975, lty=2)
points(ND$time_to_psa, ND$log_psa_results, pch=19)
# Third joint model, combinning 2 survival modals and the longitudinal one
LJM1_v2 <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ grade_group+mri+class_age_at_bx+after_tx + (1|id),
inla.surv(drfs_time, drfs_status) ~ grade_group+mri+class_age_at_bx+after_tx),
formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * grade_group+mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
id="id",
timeVar="time_to_psa",
basRisk=c("rw2", "rw2"), # random walk
assocSurv=TRUE,
family="gaussian",
assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
control=list(int.strategy="eb"),
dataSurv=list(surv_nolong_data[which(surv_nolong_data$id %in% surv_long_ppsa_data$id),]),
dataLong=surv_long_ppsa_data
)
## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ grade_group
## + : The hyperparameters skewness correction seems abnormal, this can be a sign
## of an ill-defined model and/or issues with the fit.
## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ grade_group
## + : Internal correlation between hyperparameters is abnormally high, this is a
## sign of identifiability issues / ill-defined model.
summary(LJM1_v2)
## Longitudinal outcome (gaussian)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept_L1 0.9918 0.3350 0.3353 0.9918 1.6483
## f1time_to_psa_L1 -1.7289 0.5075 -2.7235 -1.7289 -0.7343
## f2time_to_psa_L1 0.5008 0.3293 -0.1446 0.5008 1.1461
## grade_group2_L1 -0.6026 0.2214 -1.0365 -0.6026 -0.1687
## grade_group3_L1 -0.6836 0.2674 -1.2077 -0.6836 -0.1594
## grade_group4_L1 -0.9430 0.3792 -1.6862 -0.9430 -0.1999
## grade_group5_L1 -0.8385 0.3564 -1.5370 -0.8385 -0.1401
## mri1_L1 0.0130 0.1830 -0.3456 0.0130 0.3717
## class_age_at_bx6070_L1 0.1520 0.2104 -0.2604 0.1520 0.5644
## class_age_at_bx70_L1 0.6159 0.2263 0.1724 0.6159 1.0595
## after_txrt_L1 1.3923 0.3774 0.6527 1.3923 2.1319
## after_txunk_L1 0.4313 0.2930 -0.1431 0.4313 1.0056
## f1time_to_psa:grade_group2_L1 0.4429 0.7436 -1.0144 0.4429 1.9003
## f1time_to_psa:grade_group3_L1 0.4309 0.8828 -1.2994 0.4309 2.1612
## f1time_to_psa:grade_group4_L1 1.4457 1.2564 -1.0169 1.4457 3.9083
## f1time_to_psa:grade_group5_L1 3.1416 1.4851 0.2307 3.1416 6.0524
## f2time_to_psa:grade_group2_L1 2.4056 0.5360 1.3551 2.4056 3.4561
## f2time_to_psa:grade_group3_L1 1.9442 0.5374 0.8908 1.9442 2.9975
## f2time_to_psa:grade_group4_L1 -0.7062 0.6706 -2.0206 -0.7062 0.6081
## f2time_to_psa:grade_group5_L1 0.8545 2.2890 -3.6318 0.8545 5.3407
## Res. err. (variance) 0.3161 0.0134 0.2909 0.3157 0.3437
##
## Random effects variance-covariance (L1)
## mean sd 0.025quant 0.5quant
## Intercept_L1 21877.233 368288.5 0.6297 9.0712
## f1time_to_psa_L1 43925604.510 1520625231.5 0.0427 8.3465
## f2time_to_psa_L1 11684.163 435821.5 0.0001 0.5875
## Intercept_L1:f1time_to_psa_L1 468908.831 14173708.5 -3121.9990 -1.2575
## Intercept_L1:f2time_to_psa_L1 5490.018 115292.2 -46.2756 0.3083
## f1time_to_psa_L1:f2time_to_psa_L1 104633.027 2110109.5 -42.7303 -0.0440
## 0.975quant
## Intercept_L1 44647.276
## f1time_to_psa_L1 3049344.962
## f2time_to_psa_L1 5644.743
## Intercept_L1:f1time_to_psa_L1 185387.372
## Intercept_L1:f2time_to_psa_L1 9853.974
## f1time_to_psa_L1:f2time_to_psa_L1 81180.934
##
## Survival outcome (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 14.8147 178.1015 0.0000 0.0087 55.7707
## grade_group2_S1 2.9313 0.7781 1.4063 2.9313 4.4564
## grade_group3_S1 2.7160 0.9494 0.8552 2.7160 4.5768
## grade_group4_S1 1.8039 1.3340 -0.8107 1.8039 4.4186
## grade_group5_S1 3.2865 1.2613 0.8144 3.2865 5.7587
## mri1_S1 1.4565 0.7689 -0.0506 1.4565 2.9635
## class_age_at_bx6070_S1 0.2248 0.8650 -1.4705 0.2248 1.9200
## class_age_at_bx70_S1 0.7894 0.9320 -1.0373 0.7894 2.6160
## after_txrt_S1 -0.7927 1.4678 -3.6695 -0.7927 2.0841
## after_txunk_S1 -1.1563 1.0562 -3.2263 -1.1563 0.9138
##
## Frailty term variance (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 11.817 6.9784 3.1772 10.2328 29.9483
##
## Survival outcome (S2)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 0.3268 0.8045 0.0049 0.0955 2.2095
## grade_group2_S2 3.1092 0.8563 1.4309 3.1092 4.7875
## grade_group3_S2 3.0022 1.0478 0.9487 3.0022 5.0558
## grade_group4_S2 1.8863 1.4775 -1.0095 1.8863 4.7821
## grade_group5_S2 3.5006 1.3886 0.7790 3.5006 6.2222
## mri1_S2 1.9778 0.8510 0.3098 1.9778 3.6458
## class_age_at_bx6070_S2 0.2044 0.9557 -1.6688 0.2044 2.0777
## class_age_at_bx70_S2 0.6324 1.0307 -1.3877 0.6324 2.6526
## after_txrt_S2 -0.5736 1.6023 -3.7140 -0.5736 2.5667
## after_txunk_S2 1.2060 1.1589 -1.0654 1.2060 3.4774
##
## Association longitudinal - survival
## mean sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.1200 1.8057 -3.5926 -0.1475 3.5162
## CV_L1_S2 0.0141 1.4056 -2.6902 -0.0070 2.8434
##
## Association survival - survival
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.1452 0.4414 0.3014 1.1368 2.039
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -4971.973 -4959.739
##
## Deviance Information Criterion: -1988.515
## Widely applicable Bayesian information criterion: -2024.257
## Computation time: 6.67 seconds
ALL T2PSA By GG
gg <- 2
id_rle <- rle(surv_long_data$id)
new_id <- rep(seq_along(id_rle$lengths), id_rle$lengths)
# -> Reassign the new IDs to the id column in surv_long_data
longdata <- surv_long_data
longdata$id <- new_id
# Surv data to be used in the latter join model
surdata <- longdata[!duplicated(longdata$id), ]
# Third joint model, combinning 2 survival modals and the longitudinal one
LJM1_tmp <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ mri+class_age_at_bx+after_tx + (1|id),
inla.surv(drfs_time, drfs_status) ~ mri+class_age_at_bx+after_tx),
formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id),
id="id",
timeVar="time_to_psa",
basRisk=c("rw2", "rw2"), # random walk
assocSurv=TRUE,
family="gaussian",
assoc=c("CV", "CV"), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t
control=list(int.strategy="eb"),
dataSurv=list(surdata[(surdata$id %in% longdata$id) & (surdata$grade_group == gg),]),
dataLong=longdata[longdata$grade_group == gg,]
)
## Warning in joint(formSurv = list(inla.surv(tfs_time, tfs_status) ~ mri + : Max
## id is 117 but there are only 39 individuals with longitudinal records, I'm
## reassigning id from 1 to 39
cat("Current GG is ", gg)
## Current GG is 2
summary(LJM1_tmp)
## Longitudinal outcome (gaussian)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept_L1 0.7770 0.2469 0.2931 0.7770 1.2609
## f1time_to_psa_L1 -0.7843 0.2614 -1.2966 -0.7843 -0.2720
## f2time_to_psa_L1 2.1851 0.4053 1.3906 2.1851 2.9795
## mri1_L1 0.2193 0.1880 -0.1492 0.2193 0.5878
## class_age_at_bx6070_L1 -0.1058 0.2059 -0.5093 -0.1058 0.2978
## class_age_at_bx70_L1 0.2007 0.2377 -0.2651 0.2007 0.6666
## after_txrt_L1 0.9697 0.4221 0.1423 0.9697 1.7971
## after_txunk_L1 0.2544 0.2542 -0.2437 0.2544 0.7526
## f1time_to_psa:mri1_L1 -4.6636 4.0228 -12.5480 -4.6636 3.2209
## f2time_to_psa:mri1_L1 -10.8246 7.7175 -25.9506 -10.8246 4.3014
## Res. err. (variance) 0.4590 0.0314 0.4005 0.4579 0.5239
##
## Random effects variance-covariance (L1)
## mean sd 0.025quant 0.5quant 0.975quant
## Intercept_L1 0.2262 0.1253 0.1040 0.1986 0.5259
## f1time_to_psa_L1 0.9705 1.4628 0.1714 0.6297 4.1348
## f2time_to_psa_L1 0.1859 0.1518 0.0338 0.1424 0.5886
## Intercept_L1:f1time_to_psa_L1 -0.0120 0.3152 -0.5829 0.0194 0.3672
## Intercept_L1:f2time_to_psa_L1 0.0155 0.1170 -0.2132 0.0081 0.2693
## f1time_to_psa_L1:f2time_to_psa_L1 0.1382 0.3983 -0.2008 0.0224 1.1353
##
## Survival outcome (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 0.0284 0.0554 0.0007 0.0110 0.1709
## mri1_S1 0.4376 0.8389 -1.2066 0.4376 2.0817
## class_age_at_bx6070_S1 -0.3112 0.8938 -2.0630 -0.3112 1.4407
## class_age_at_bx70_S1 -0.7998 1.0489 -2.8557 -0.7998 1.2561
## after_txrt_S1 -1.9794 1.8251 -5.5565 -1.9794 1.5977
## after_txunk_S1 -0.2459 1.0790 -2.3606 -0.2459 1.8688
##
## Frailty term variance (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 4.3621 1.9741 1.661 3.9819 9.316
##
## Survival outcome (S2)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 0.0504 0.0811 0.0023 0.0242 0.2683
## mri1_S2 0.4830 0.9986 -1.4742 0.4830 2.4403
## class_age_at_bx6070_S2 -0.4422 1.0539 -2.5078 -0.4422 1.6233
## class_age_at_bx70_S2 -1.0144 1.2368 -3.4384 -1.0144 1.4097
## after_txrt_S2 -2.4696 2.1249 -6.6343 -2.4696 1.6950
## after_txunk_S2 2.0236 1.2601 -0.4462 2.0236 4.4934
##
## Association longitudinal - survival
## mean sd 0.025quant 0.5quant 0.975quant
## CV_L1_S1 -0.6431 0.8051 -2.2047 -0.6511 0.9655
## CV_L1_S2 -0.1483 0.8807 -1.8652 -0.1541 1.6025
##
## Association survival - survival
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.1861 0.1991 0.8014 1.1836 1.5852
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -1700.249 -1688.015
##
## Deviance Information Criterion: -30.77604
## Widely applicable Bayesian information criterion: -88.97328
## Computation time: 2.96 seconds
ONLY +T2PSA By GG for (gg in c(‘1’,‘2’,‘3’,‘4’,‘5’)) {
Third joint model, combinning 2 survival modals and the longitudinal one LJM2_tmp <- joint(formSurv=list(inla.surv(tfs_time, tfs_status) ~ mri+class_age_at_bx+after_tx + (1|id), inla.surv(drfs_time, drfs_status) ~ mri+class_age_at_bx+after_tx), formLong=log_psa_results ~ (f1(time_to_psa) + f2(time_to_psa)) * mri+class_age_at_bx+after_tx + (1 + f1(time_to_psa) + f2(time_to_psa)| id), id=“id”, timeVar=“time_to_psa”, basRisk=c(“rw2”, “rw2”), # random walk assocSurv=TRUE, family=“gaussian”, assoc=c(“CV”, “CV”), # what is cv : Current value = risk event at t depends on longit.marker(psa) at t control=list(int.strategy=“eb”), dataSurv=list(surv_nolong_data[(surv_nolong_data\(id %in% surv_long_ppsa_data\)id) & (surv_nolong_data\(grade_group == gg),]), dataLong=surv_long_ppsa_data[surv_long_ppsa_data\)grade_group == gg,] ) cat(“Current GG is”, gg)
summary(LJM2_tmp)
}