Requirements

library(tidyverse)
library(data.table)
folder <- "/stash/data/clin/external-collaboration/tempus/P-20251203-0001/delivery_20251205/"
drug_name <- "lutetium lu 177 vipivotide tetraxetan"
# Total Patients
onco_patient <- read_csv(paste0(folder, "/data/Clinical/onco_patient.csv"), show_col_types = FALSE)
patients_all <- onco_patient$patient_id %>% unique
print(paste("Total Number of Patients:", length(patients_all)))
## [1] "Total Number of Patients: 211"
# Patients with a diagnosis date (not a year-only date)
onco_diagnosis <- read_csv(paste0(folder, "/data/Clinical/onco_diagnosis.csv"), show_col_types = FALSE)
patients_diagnosis <- onco_diagnosis %>% 
  dplyr::filter(grepl("^\\d{4}-\\d{2}-\\d{2}$", initial_diagnosis_date)) 
print(paste("Patients with a diagnosis date:", patients_diagnosis %>% pull(patient_id) %>% unique %>% length))
## [1] "Patients with a diagnosis date: 195"
rejected_diagnosis <- setdiff(patients_all, patients_diagnosis$patient_id)
print("List of Patients without a diagnosis date:")
## [1] "List of Patients without a diagnosis date:"
df <- onco_diagnosis %>% filter(patient_id %in% rejected_diagnosis) %>% dplyr::select( patient_id, initial_diagnosis_date)
DT::datatable(
  df,
  extensions = "Buttons",
  options = list(
    pageLength = nrow(df),
    scrollX = FALSE,
    scrollY = FALSE,
    paging = FALSE,
    dom = "Bfrtip",
    buttons = c("csv")
  )
)
# Patient with a RNA biopsy date (not a year-only date)
onco_meta_biospecimen <- read_csv(paste0(folder, "/data/Group_Level_Molecular//onco_meta_biospecimen.csv"), show_col_types = FALSE)
patients_biopsy <- onco_meta_biospecimen %>% 
  dplyr::filter(grepl("RNA", polymer_list)) %>% 
  dplyr::filter(grepl("^\\d{4}-\\d{2}-\\d{2}$", collection_date)) %>% 
  mutate(has_RNA_date=TRUE)
no_rna_date <- setdiff(patients_all, patients_biopsy$patient_id)
print(paste("Patient with a RNA biopsy date:", patients_biopsy %>% pull(patient_id) %>% unique %>% length))
## [1] "Patient with a RNA biopsy date: 211"
df <- onco_meta_biospecimen %>% filter(patient_id %in% no_rna_date) %>% dplyr::select( patient_id, collection_date)
print("List of Patients without a RNA biopsy date:")
## [1] "List of Patients without a RNA biopsy date:"
DT::datatable(  df,
                extensions = "Buttons",
  options = list(
    pageLength = nrow(df),
    scrollX = FALSE,
    scrollY = FALSE,
    paging = FALSE,
    dom = "Bfrtip",
    buttons = c("csv")
  )
)
# Drug with date
onco_care_plan <- read_csv(paste0(folder, "/data/Clinical/onco_care_plan.csv"), show_col_types = FALSE)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
drug_date <- onco_care_plan %>% 
  dplyr::mutate(
    drug_has_date = case_when(
      grepl(drug_name, agents) & grepl("^\\d{4}-\\d{2}-\\d{2}$", start_date) ~ TRUE,
      grepl(drug_name, agents) & !grepl("^\\d{4}-\\d{2}-\\d{2}$", start_date) ~ FALSE,
      TRUE ~ NA)
  ) %>% filter(!is.na(drug_has_date))
output <- drug_date %>% pull(drug_has_date) %>% table(useNA = "always")
print(paste("Pluvicto with a date:", output[2]))
## [1] "Pluvicto with a date: 212"
df <- drug_date %>% filter(drug_has_date == FALSE) %>% dplyr::select(care_plan_id, patient_id, agents, start_date)
print("List of Pluvicto without a date:")
## [1] "List of Pluvicto without a date:"
DT::datatable(  df,
                extensions = "Buttons",
  options = list(
    pageLength = nrow(df),
    scrollX = FALSE,
    scrollY = FALSE,
    paging = FALSE,
    dom = "Bfrtip",
    buttons = c("csv")
  )
)
# - The regimen containing the Pluvicto occurs either directly before or directly 
# after the biopsy (it should be within X days, with no other regimens occurring between Pluvicto and biopsy). 
# All regimens can be grouped together using care_plan_id
# Extract drug-only regimens
drug_plans <- onco_care_plan %>% dplyr::filter(grepl(drug_name, agents) & !grepl("Radiotherapy", therapy_class )) %>% 
  dplyr::select(care_plan_id, patient_id, regimen_id, agents, start_date_indexed, end_date_indexed)
# Extract non-edrug regimens
non_drug_plans <- onco_care_plan %>% dplyr::filter(!grepl(drug_name, agents) & 
                                                  !is.na(regimen_id) ) %>% 
  dplyr::select(care_plan_id, patient_id, regimen_id, agents, start_date_indexed, end_date_indexed)
# Extract relevant biopsis
biopsis <- onco_meta_biospecimen %>%   
  dplyr::filter(grepl("RNA", polymer_list)) %>%  
  dplyr::filter(grepl("^\\d{4}-\\d{2}-\\d{2}$", collection_date_indexed)) %>%
  dplyr::select(meta_biospecimen_id, patient_id, collection_date_indexed) 
# Merge everything together
joined <- full_join(drug_plans, non_drug_plans, by=c( "patient_id"), relationship = "many-to-many") %>%
  full_join(biopsis, by=c( "patient_id"), relationship = "many-to-many") %>% 
  full_join(onco_diagnosis %>% dplyr::select(patient_id, initial_diagnosis_date_indexed), by=c( "patient_id"), relationship = "many-to-many") 
  
# Set some inclusions flags
joined <- joined %>% 
  # First replace NA date with today date
  mutate(
         start_date_indexed.x = if_else(is.na(start_date_indexed.x), initial_diagnosis_date_indexed, start_date_indexed.x),
         end_date_indexed.x = if_else(is.na(end_date_indexed.x), today(), end_date_indexed.x),
         start_date_indexed.y = if_else(is.na(start_date_indexed.y), initial_diagnosis_date_indexed, start_date_indexed.y),
         end_date_indexed.y = if_else(is.na(end_date_indexed.y), today(), end_date_indexed.y),
         ) 
# For patients with biopsy post-Pluvicto, 
# identify patients that have a regimen that begins between Pluvicto and biopsy
patients_post_drug <- joined %>% 
  dplyr::select(patient_id, meta_biospecimen_id, regimen_id.x, regimen_id.y, agents.x, agents.y, 
                start_date_indexed.x, end_date_indexed.x, start_date_indexed.y, end_date_indexed.y, 
                collection_date_indexed) %>% 
  dplyr::filter(collection_date_indexed > start_date_indexed.x & 
                    (between(start_date_indexed.y, start_date_indexed.x, collection_date_indexed) | 
                       between(start_date_indexed.y, end_date_indexed.x, collection_date_indexed))
  )
df <- patients_post_drug %>% dplyr::select(patient_id, regimen_id.x, regimen_id.y, agents.x, agents.y, start_date_indexed.x, end_date_indexed.x, start_date_indexed.y, end_date_indexed.y, collection_date_indexed)
print(paste("Patients with biopsy post-Pluvicto that have a regimen that begins between Pluvicto and biopsy:", length(df$patient_id %>% unique)))
## [1] "Patients with biopsy post-Pluvicto that have a regimen that begins between Pluvicto and biopsy: 25"
rejected_regimen_between <- df$patient_id %>% unique
print(paste("List of biopsies post-Pluvicto that have a regimen that begins between Pluvicto and the biopsy:"))
## [1] "List of biopsies post-Pluvicto that have a regimen that begins between Pluvicto and the biopsy:"
DT::datatable(  df,
                extensions = "Buttons",
  options = list(
    pageLength = nrow(df),
    scrollX = FALSE,
    scrollY = FALSE,
    paging = FALSE,
    dom = "Bfrtip",
    buttons = c("csv")
  )
)
# Number of patients that were previously delivered
legacy_mapping <- read_csv(paste0(folder, "docs/Data_Model/legacy_ID_mapping_20251204.csv"))
## Rows: 1122 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): id_field, id_value, legacy_value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(paste("Number of patients previously delivered:", onco_patient$patient_id %in% legacy_mapping$id_value %>% sum))
## [1] "Number of patients previously delivered: 186"
print("Patients previously delivered:")
## [1] "Patients previously delivered:"
df <- onco_patient$patient_id[onco_patient$patient_id %in% legacy_mapping$id_value] %>% as.data.frame()
DT::datatable(  df,
                extensions = "Buttons",
  options = list(
    pageLength = nrow(df),
    scrollX = FALSE,
    scrollY = FALSE,
    paging = FALSE,
    dom = "Bfrtip",
    buttons = c("csv")
  ))
rejected_patients <- union(rejected_diagnosis, rejected_regimen_between)
print(paste("Total Rejected Patients:", rejected_patients %>% length))
## [1] "Total Rejected Patients: 38"
df <- rejected_patients %>% as.data.frame()
DT::datatable(  df,
                extensions = "Buttons",
  options = list(
    pageLength = nrow(df),
    scrollX = FALSE,
    scrollY = FALSE,
    paging = FALSE,
    dom = "Bfrtip",
    buttons = c("csv")
  )
)