Requirements
- Prostate cancer diagnosis with date
- RNA biopsy with date
- Pluvicto with date (using the onco_care_plan table) – generic med
name is lutetium lu 177 vipivotide tetraxetan
- For patients with biopsy post-Pluvicto, please identify patients
that have a regimen that begins between Pluvicto and biopsy, we will
need counts for this in early January Note: for the purposes of the QC,
you can remove all radiotherapy instances; they should not be used to
determine if a regimen is flanking a biopsy
- Number of patients that were previously delivered (this can be found
in docs/Data_Model/legacy_ID_mapping; you can match the ID to the
patients delivered in the prior Prostate intermediate files)
library(tidyverse)
library(data.table)
folder <- "/stash/data/clin/external-collaboration/tempus/P-20251203-0001/delivery_20251205/"
drug_name <- "lutetium lu 177 vipivotide tetraxetan"
# Total Patients
onco_patient <- read_csv(paste0(folder, "/data/Clinical/onco_patient.csv"), show_col_types = FALSE)
patients_all <- onco_patient$patient_id %>% unique
print(paste("Total Number of Patients:", length(patients_all)))
## [1] "Total Number of Patients: 211"
# Patients with a diagnosis date (not a year-only date)
onco_diagnosis <- read_csv(paste0(folder, "/data/Clinical/onco_diagnosis.csv"), show_col_types = FALSE)
patients_diagnosis <- onco_diagnosis %>%
dplyr::filter(grepl("^\\d{4}-\\d{2}-\\d{2}$", initial_diagnosis_date))
print(paste("Patients with a diagnosis date:", patients_diagnosis %>% pull(patient_id) %>% unique %>% length))
## [1] "Patients with a diagnosis date: 195"
rejected_diagnosis <- setdiff(patients_all, patients_diagnosis$patient_id)
print("List of Patients without a diagnosis date:")
## [1] "List of Patients without a diagnosis date:"
df <- onco_diagnosis %>% filter(patient_id %in% rejected_diagnosis) %>% dplyr::select( patient_id, initial_diagnosis_date)
DT::datatable(
df,
extensions = "Buttons",
options = list(
pageLength = nrow(df),
scrollX = FALSE,
scrollY = FALSE,
paging = FALSE,
dom = "Bfrtip",
buttons = c("csv")
)
)
# Patient with a RNA biopsy date (not a year-only date)
onco_meta_biospecimen <- read_csv(paste0(folder, "/data/Group_Level_Molecular//onco_meta_biospecimen.csv"), show_col_types = FALSE)
patients_biopsy <- onco_meta_biospecimen %>%
dplyr::filter(grepl("RNA", polymer_list)) %>%
dplyr::filter(grepl("^\\d{4}-\\d{2}-\\d{2}$", collection_date)) %>%
mutate(has_RNA_date=TRUE)
no_rna_date <- setdiff(patients_all, patients_biopsy$patient_id)
print(paste("Patient with a RNA biopsy date:", patients_biopsy %>% pull(patient_id) %>% unique %>% length))
## [1] "Patient with a RNA biopsy date: 211"
df <- onco_meta_biospecimen %>% filter(patient_id %in% no_rna_date) %>% dplyr::select( patient_id, collection_date)
print("List of Patients without a RNA biopsy date:")
## [1] "List of Patients without a RNA biopsy date:"
DT::datatable( df,
extensions = "Buttons",
options = list(
pageLength = nrow(df),
scrollX = FALSE,
scrollY = FALSE,
paging = FALSE,
dom = "Bfrtip",
buttons = c("csv")
)
)
# Drug with date
onco_care_plan <- read_csv(paste0(folder, "/data/Clinical/onco_care_plan.csv"), show_col_types = FALSE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
drug_date <- onco_care_plan %>%
dplyr::mutate(
drug_has_date = case_when(
grepl(drug_name, agents) & grepl("^\\d{4}-\\d{2}-\\d{2}$", start_date) ~ TRUE,
grepl(drug_name, agents) & !grepl("^\\d{4}-\\d{2}-\\d{2}$", start_date) ~ FALSE,
TRUE ~ NA)
) %>% filter(!is.na(drug_has_date))
output <- drug_date %>% pull(drug_has_date) %>% table(useNA = "always")
print(paste("Pluvicto with a date:", output[2]))
## [1] "Pluvicto with a date: 212"
df <- drug_date %>% filter(drug_has_date == FALSE) %>% dplyr::select(care_plan_id, patient_id, agents, start_date)
print("List of Pluvicto without a date:")
## [1] "List of Pluvicto without a date:"
DT::datatable( df,
extensions = "Buttons",
options = list(
pageLength = nrow(df),
scrollX = FALSE,
scrollY = FALSE,
paging = FALSE,
dom = "Bfrtip",
buttons = c("csv")
)
)
# - The regimen containing the Pluvicto occurs either directly before or directly
# after the biopsy (it should be within X days, with no other regimens occurring between Pluvicto and biopsy).
# All regimens can be grouped together using care_plan_id
# Extract drug-only regimens
drug_plans <- onco_care_plan %>% dplyr::filter(grepl(drug_name, agents) & !grepl("Radiotherapy", therapy_class )) %>%
dplyr::select(care_plan_id, patient_id, regimen_id, agents, start_date_indexed, end_date_indexed)
# Extract non-edrug regimens
non_drug_plans <- onco_care_plan %>% dplyr::filter(!grepl(drug_name, agents) &
!is.na(regimen_id) ) %>%
dplyr::select(care_plan_id, patient_id, regimen_id, agents, start_date_indexed, end_date_indexed)
# Extract relevant biopsis
biopsis <- onco_meta_biospecimen %>%
dplyr::filter(grepl("RNA", polymer_list)) %>%
dplyr::filter(grepl("^\\d{4}-\\d{2}-\\d{2}$", collection_date_indexed)) %>%
dplyr::select(meta_biospecimen_id, patient_id, collection_date_indexed)
# Merge everything together
joined <- full_join(drug_plans, non_drug_plans, by=c( "patient_id"), relationship = "many-to-many") %>%
full_join(biopsis, by=c( "patient_id"), relationship = "many-to-many") %>%
full_join(onco_diagnosis %>% dplyr::select(patient_id, initial_diagnosis_date_indexed), by=c( "patient_id"), relationship = "many-to-many")
# Set some inclusions flags
joined <- joined %>%
# First replace NA date with today date
mutate(
start_date_indexed.x = if_else(is.na(start_date_indexed.x), initial_diagnosis_date_indexed, start_date_indexed.x),
end_date_indexed.x = if_else(is.na(end_date_indexed.x), today(), end_date_indexed.x),
start_date_indexed.y = if_else(is.na(start_date_indexed.y), initial_diagnosis_date_indexed, start_date_indexed.y),
end_date_indexed.y = if_else(is.na(end_date_indexed.y), today(), end_date_indexed.y),
)
# For patients with biopsy post-Pluvicto,
# identify patients that have a regimen that begins between Pluvicto and biopsy
patients_post_drug <- joined %>%
dplyr::select(patient_id, meta_biospecimen_id, regimen_id.x, regimen_id.y, agents.x, agents.y,
start_date_indexed.x, end_date_indexed.x, start_date_indexed.y, end_date_indexed.y,
collection_date_indexed) %>%
dplyr::filter(collection_date_indexed > start_date_indexed.x &
(between(start_date_indexed.y, start_date_indexed.x, collection_date_indexed) |
between(start_date_indexed.y, end_date_indexed.x, collection_date_indexed))
)
df <- patients_post_drug %>% dplyr::select(patient_id, regimen_id.x, regimen_id.y, agents.x, agents.y, start_date_indexed.x, end_date_indexed.x, start_date_indexed.y, end_date_indexed.y, collection_date_indexed)
print(paste("Patients with biopsy post-Pluvicto that have a regimen that begins between Pluvicto and biopsy:", length(df$patient_id %>% unique)))
## [1] "Patients with biopsy post-Pluvicto that have a regimen that begins between Pluvicto and biopsy: 25"
rejected_regimen_between <- df$patient_id %>% unique
print(paste("List of biopsies post-Pluvicto that have a regimen that begins between Pluvicto and the biopsy:"))
## [1] "List of biopsies post-Pluvicto that have a regimen that begins between Pluvicto and the biopsy:"
DT::datatable( df,
extensions = "Buttons",
options = list(
pageLength = nrow(df),
scrollX = FALSE,
scrollY = FALSE,
paging = FALSE,
dom = "Bfrtip",
buttons = c("csv")
)
)
# Number of patients that were previously delivered
legacy_mapping <- read_csv(paste0(folder, "docs/Data_Model/legacy_ID_mapping_20251204.csv"))
## Rows: 1122 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): id_field, id_value, legacy_value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(paste("Number of patients previously delivered:", onco_patient$patient_id %in% legacy_mapping$id_value %>% sum))
## [1] "Number of patients previously delivered: 186"
print("Patients previously delivered:")
## [1] "Patients previously delivered:"
df <- onco_patient$patient_id[onco_patient$patient_id %in% legacy_mapping$id_value] %>% as.data.frame()
DT::datatable( df,
extensions = "Buttons",
options = list(
pageLength = nrow(df),
scrollX = FALSE,
scrollY = FALSE,
paging = FALSE,
dom = "Bfrtip",
buttons = c("csv")
))
rejected_patients <- union(rejected_diagnosis, rejected_regimen_between)
print(paste("Total Rejected Patients:", rejected_patients %>% length))
## [1] "Total Rejected Patients: 38"
df <- rejected_patients %>% as.data.frame()
DT::datatable( df,
extensions = "Buttons",
options = list(
pageLength = nrow(df),
scrollX = FALSE,
scrollY = FALSE,
paging = FALSE,
dom = "Bfrtip",
buttons = c("csv")
)
)