# install.packages("arrow")
# install.packages("ggplot2")
# install.packages("dplyr")
# install.packages("frailtypack")
##
## Attaching package: 'arrow'
## The following object is masked from 'package:utils':
##
## timestamp
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bcr_data <- read_parquet("kdata/bcr.parquet")
tfs_data <- read_parquet("kdata/tfs.parquet")
# Patients with BCR before treatment (!!!)
bcr_before_tx <- subset(bcr_data, relapse_date < treatment_date)
bcr_data_v2 <- subset(bcr_data, !(pseudoid %in% bcr_before_tx$pseudoid))
# Create a scatter plot with regression line
ggplot(bcr_data_v2, aes(x = months_sampling_to_treatment, y = months_treatment_to_relapse)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "[A~B] All BCR patients",
x = "Months Sampling to Treatment",
y = "Months Treatment to Relapse")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 6043 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6043 rows containing missing values (`geom_point()`).
ggplot(bcr_data_v2, aes(x = months_sampling_to_treatment, y = months_treatment_to_relapse)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_grid(~ grade_group) +
labs(title = "[A~B] BCR patients by Gleason Grade Group",
x = "Months Sampling to Treatment",
y = "Months Treatment to Relapse")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 6043 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6043 rows containing missing values (`geom_point()`).
ggplot(bcr_data_v2, aes(x = months_sampling_to_treatment, y = months_sampling_to_relapse)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "[A~C] All BCR patients",
x = "Months Sampling to Treatment",
y = "Months Sampling to Relapse")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 6043 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6043 rows containing missing values (`geom_point()`).
ggplot(bcr_data_v2, aes(x = months_sampling_to_treatment, y = months_sampling_to_relapse)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
facet_grid(~ grade_group) +
labs(title = "[A~C] BCR patients by Gleason grade group",
x = "Months Sampling to Treatment",
y = "Months Sampling to Relapse")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 6043 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 6043 rows containing missing values (`geom_point()`).
# Group by A and calculate mean of B
grouped_data <- bcr_data_v2 %>%
group_by(months_sampling_to_treatment) %>%
summarize(mean_months_relapse = mean(months_treatment_to_relapse, na.rm = TRUE))
# Create a scatter plot with regression line and facet by 'grade_group'
ggplot(grouped_data, aes(x = months_sampling_to_treatment, y = mean_months_relapse)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "[A~B.mean] Mean Months Treatment to relapse by Months Sampling to Treatment",
x = "Months Sampling to Treatment",
y = "Mean Months Treatment to Relapse")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 37 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 37 rows containing missing values (`geom_point()`).
# Group by A and calculate mean of B
grouped_data <- bcr_data_v2 %>%
group_by(months_sampling_to_treatment, grade_group) %>%
summarize(mean_months_relapse = mean(months_treatment_to_relapse, na.rm = TRUE))
## `summarise()` has grouped output by 'months_sampling_to_treatment'. You can
## override using the `.groups` argument.
# Create a scatter plot with regression line and facet by 'grade_group'
ggplot(grouped_data, aes(x = months_sampling_to_treatment, y = mean_months_relapse)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "[A~B.mean] Mean Months Treatment to relapse",
x = "Months Sampling to Treatment",
y = "Mean Months Treatment to Relapse")+
facet_grid(~ grade_group)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 173 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 173 rows containing missing values (`geom_point()`).
# Group by [A] 'months_sampling_to_treatment' and calculate mean
grouped_data <- bcr_data_v2 %>%
group_by(months_sampling_to_treatment) %>%
summarize(mean_months_relapse = mean(months_sampling_to_relapse, na.rm = TRUE))
# Create a scatter plot with regression line
ggplot(grouped_data, aes(x = months_sampling_to_treatment, y = mean_months_relapse)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "[A~C.mean] Mean Months Sampling to Relapse by Months Sampling to Relapse",
x = "Months Sampling to Treatment",
y = "Mean Months Sampling to Relapse")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 37 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 37 rows containing missing values (`geom_point()`).
# Group by [A] 'months_sampling_to_treatment' and calculate mean
grouped_data <- bcr_data_v2 %>%
group_by(months_sampling_to_treatment, grade_group) %>%
summarize(mean_months_relapse = mean(months_sampling_to_relapse, na.rm = TRUE))
## `summarise()` has grouped output by 'months_sampling_to_treatment'. You can
## override using the `.groups` argument.
# Create a scatter plot with regression line and facet by 'grade_group'
ggplot(grouped_data, aes(x = months_sampling_to_treatment, y = mean_months_relapse)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(title = "[A~C.mean] Mean Months Sampling to Relapse by Months Sampling to Relapse",
x = "Months Sampling to Treatment",
y = "Mean Months Sampling to Relapse")+
facet_grid(~ grade_group)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 173 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 173 rows containing missing values (`geom_point()`).
library(INLA)
## Loading required package: Matrix
## Loading required package: sp
## This is INLA_24.01.29 built 2024-01-28 20:44:17 UTC.
## - See www.r-inla.org/contact-us for how to get help.
## - List available models/likelihoods/etc with inla.list.models()
## - Use inla.doc(<NAME>) to access documentation
library(INLAjoint)
## Package 'INLAjoint' version 24.2.5
## Type 'citation("INLAjoint")' for citing this R package in publications.
# time to treatment , is treated, time to recurrence, is relapsed
# 2 surv modals
# v3: only patients with Tx , we look at the risk over time
tfs_data_v3 <- tfs_data[ tfs_data$tfs_time >0,]
M1 <- joint(formSurv = list(inla.surv(time=tfs_time, event=tfs_status) ~ -1),
basRisk = c("weibullsurv"),
dataSurv = list(tfs_data_v3))
summary(M1)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Weibull (shape) 0.5518 0.0045 0.5431 0.5518 0.5607
## Weibull (scale) 0.1682 0.0026 0.1633 0.1681 0.1732
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -37835.3 -37835.3
##
## Deviance Information Criterion: 75651.92
## Widely applicable Bayesian information criterion: 75652.02
## Computation time: 4.66 seconds
Note: mean is ….
M1rw2 <- joint(formSurv = list(inla.surv(time=tfs_time, event=tfs_status) ~ -1),
basRisk = c("rw2"),
dataSurv = list(tfs_data_v3))
plot(M1rw2)
## $Baseline
##
## attr(,"class")
## [1] "plot.INLAjoint" "list"
Note: this cureve shows …..
# v3: only patients with BCR , we look at the risk over time
# !!!!! WRONG, this should include both rfs 1 and 0 ????
bcr_data_v3 <- bcr_data_v2[which(!is.na(bcr_data_v2$months_sampling_to_relapse)),]
M2 <- joint(formSurv=list(inla.surv(time= drfs_time, event=drfs_status) ~ -1),
basRisk=c("weibullsurv"),
dataSurv = list(bcr_data_v3))
summary(M2)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Weibull (shape) 1.4135 0.0210 1.3726 1.4133 1.4555
## Weibull (scale) 0.0031 0.0002 0.0027 0.0031 0.0034
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -13199.48 -13199.48
##
## Deviance Information Criterion: 26387.24
## Widely applicable Bayesian information criterion: 26387.19
## Computation time: 0.99 seconds
Note: the resutls are ….
M2rw2 <- joint(formSurv = list(inla.surv(time=drfs_time, event=drfs_status) ~ -1),
basRisk = c("rw2"),
dataSurv = list(bcr_data_v3))
plot(M2rw2)
## $Baseline
##
## attr(,"class")
## [1] "plot.INLAjoint" "list"
Note: This curve shows ….
# 12 recurrent-terminal
library(frailtypack)
## Loading required package: survival
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:survival':
##
## aml
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: survC1
## Loading required package: doBy
##
## Attaching package: 'doBy'
## The following object is masked from 'package:dplyr':
##
## order_by
##
## Attaching package: 'frailtypack'
## The following object is masked from 'package:survival':
##
## cluster
data(readmission)
terminalData <- readmission[readmission$event==0,]
#data$id <- 1:nrow(data)
M1 <- joint(formSurv=list(inla.surv(time = t.stop, event = death) ~ -1),
basRisk=c("rw2"), dataSurv = list(terminalData))
summary(M1)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.0162 0.037 1e-04 0.0044 0.11
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -581.1012 -581.1012
##
## Deviance Information Criterion: 1118.513
## Widely applicable Bayesian information criterion: 1118.468
## Computation time: 0.97 seconds
plot(M1)
## $Baseline
##
## attr(,"class")
## [1] "plot.INLAjoint" "list"
M2 <- joint(formSurv=list(inla.surv(time = t.stop, event = event) ~ -1),
basRisk=c("rw2"), dataSurv = list(terminalData))
summary(M2)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.0283 0.0754 1e-04 0.0059 0.2062
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -27.04279 -27.04279
##
## Deviance Information Criterion: 5.486813
## Widely applicable Bayesian information criterion: 3.93188
## Computation time: 0.97 seconds
plot(M2)$Baseline+scale_y_log10()
M12 <- joint(formSurv=list(inla.surv(time = t.stop, event = event) ~ -1+(1|id),
inla.surv(time = t.stop, event = death) ~ -1), id="id",
basRisk=c("rw2", "rw2"), assocSurv=TRUE,
dataSurv = list(readmission,terminalData))
summary(M12)
##
## Survival outcome (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 0.0291 0.0173 0.0081 0.025 0.0745
##
## Frailty term variance (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 0.9113 0.3461 0.4192 0.849 1.7667
##
## Survival outcome (S2)
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 0.0127 0.0058 0.0049 0.0116 0.0272
##
## Association survival - survival
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.938 0.3073 1.3342 1.9377 2.5441
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -2491.607 -2491.607
##
## Deviance Information Criterion: 4756.345
## Widely applicable Bayesian information criterion: 4746.463
## Computation time: 2.2 seconds
# First I have to combine (back) the tfs and rfs data in one dataframe
surv_data <- merge(bcr_data_v3[, c('pseudoid', 'drfs_status', 'drfs_time')],
tfs_data[, c('pseudoid', 'tfs_status', 'tfs_time')], on='pseudoid', all.x=TRUE)
# New ID
surv_data$id <- 1:nrow(surv_data)
# Re-Order
surv_data <- surv_data[, c(6, 5, 4, 3, 2)]
# Remove where both time to treatment/bcr = 0 and event = 0 (this should be update when converting time to days)
all_null_tfs <- surv_data[surv_data$tfs_time==0 & surv_data$tfs_status==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_tfs$id), ]
all_null_drfs <- surv_data[surv_data$drfs_time==0 & surv_data$drfs_status==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_drfs$id), ]
# Now if no BCR data then rfs status = 0 and rfs time = tfs time [to fill out the NULL cells]
surv_data$drfs_status <- ifelse(is.na(surv_data$drfs_status), 0, 0)
surv_data$drfs_time <- ifelse(is.na(surv_data$drfs_time), surv_data$tfs_time, surv_data$drfs_time)
# Remove when time to tx/bcr = 0
all_null_tfs_drfs <- surv_data[surv_data$tfs_time==0 & surv_data$drfs_time==0, ]
surv_data <- surv_data[!(surv_data$id %in% all_null_tfs_drfs$id), ]
M1 <- joint(formSurv =list(inla.surv(time = tfs_time, event = tfs_status) ~ -1),
basRisk = c("rw2"),
dataSurv = list(surv_data))
summary(M1)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.6014 0.2514 0.2529 0.5536 1.2299
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -5874.327 -5874.327
##
## Deviance Information Criterion: 11683.18
## Widely applicable Bayesian information criterion: 11683.43
## Computation time: 1.09 seconds
plot(M1)
## $Baseline
##
## attr(,"class")
## [1] "plot.INLAjoint" "list"
M2 <- joint(formSurv = list(inla.surv(time = drfs_time, event = drfs_status) ~ -1),
basRisk = c("rw2"),
dataSurv = list(surv_data))
summary(M2)
##
## Survival outcome
## mean sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance) 0.0284 0.0768 0 0.0058 0.2082
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -23.81622 -23.81622
##
## Deviance Information Criterion: 4.900577
## Widely applicable Bayesian information criterion: 3.413754
## Computation time: 1.31 seconds
plot(M2)$Baseline+scale_y_log10()
M12 <- joint(formSurv=list(inla.surv(time = tfs_time, event = tfs_status) ~ -1+(1|id),
inla.surv(time = drfs_time, event = drfs_status) ~ -1), id="id",
basRisk=c("rw2", "rw2"), assocSurv=TRUE, NbasRisk = 30,
dataSurv = list(surv_data))
summary(M12, hr=TRUE)
##
## Survival outcome (S1)
## exp(mean) sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S1 0.5965 0.2421 0.2565 0.552 1.1979
##
## Frailty term variance (S1)
## mean sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1 0.0177 0.0156 0.0027 0.0131 0.0607
##
## Survival outcome (S2)
## exp(mean) sd 0.025quant 0.5quant 0.975quant
## Baseline risk (variance)_S2 0.026 0.0669 1e-04 0.0058 0.1869
##
## Association survival - survival
## exp(mean) sd 0.025quant 0.5quant 0.975quant
## IDIntercept_S1_S2 1.6871 1.972 0.1447 1.0413 7.2437
##
## log marginal-likelihood (integration) log marginal-likelihood (Gaussian)
## -6943.721 -6943.721
##
## Deviance Information Criterion: 13579.76
## Widely applicable Bayesian information criterion: 13576.21
## Computation time: 3.73 seconds
plot(M12)$Baseline+scale_y_log10()