Setup

Install and load the required packages.

library(BEACON)
library(SummarizedExperiment)
library(ggplot2)
library(dplyr)

Loading the Data

BEACON provides curated bladder cancer datasets. Here we load the BLCA_BCAN_HCRN cohort — a clinical trial dataset with 61 patients and 58,385 genes.

# Load the full SummarizedExperiment object
HCRN <- BEACON::BLCA_BCAN_HCRN()

# Quick overview
HCRN
## class: SummarizedExperiment 
## dim: 58385 61 
## metadata(0):
## assays(1): expr
## rownames(58385): 1-Dec 1-Mar ... ZZEF1 ZZZ3
## rowData names(0):
## colnames(61): 5037-TUMOR 5116-TUMOR ... 8250-TUMOR 8726-TUMOR
## colData names(52): Sample_ID T_stage ... OS.ID OS.time
# Extract clinical metadata and expression matrix
meta <- as.data.frame(HCRN@colData)
expr <- SummarizedExperiment::assay(HCRN)

cat("Patients:", nrow(meta), "\n")
## Patients: 61
cat("Clinical variables:", ncol(meta), "\n")
## Clinical variables: 52
cat("Genes:", nrow(expr), "\n")
## Genes: 58385

Exploring Clinical Metadata

# What clinical variables are available?
colnames(meta)
##  [1] "Sample_ID"                                 
##  [2] "T_stage"                                   
##  [3] "N_Stage"                                   
##  [4] "Neoadjuvant_Therapy_"                      
##  [5] "Study_ID"                                  
##  [6] "Patient_ID"                                
##  [7] "BCAN_ID"                                   
##  [8] "ADC_._Immunotherapy"                       
##  [9] "ADC_Therapy"                               
## [10] "Adjuvant_Radiation_Therapy"                
## [11] "Adjuvant_Therapy"                          
## [12] "Age_at_Diagnosis"                          
## [13] "ECOG"                                      
## [14] "ADC_Therapy_Best_Response"                 
## [15] "ADC_._Immunotherapy_Best_Response"         
## [16] "Chemotherapy_Best_Response"                
## [17] "Chemotherapy_._Immunotherapy_Best_Response"
## [18] "Immunotherapy_Best_Response"               
## [19] "Targeted_Therapy_Best_Response"            
## [20] "Cancer_Type"                               
## [21] "Cancer_Type_Detailed"                      
## [22] "Chemotherapy_"                             
## [23] "Chemotherapy_._Immunotherapy"              
## [24] "Survival_with_Chemotherapy"                
## [25] "Concurrent_Chemoradiation"                 
## [26] "Reason_for_Death"                          
## [27] "First_Treatment"                           
## [28] "X_Immunotherapy"                           
## [29] "Survival_with_Immunotherapy"               
## [30] "Metastatic_Radiation_Therapy"              
## [31] "Mutation_Count"                            
## [32] "Therapy_prior_to_NGS"                      
## [33] "Oncotree_Code"                             
## [34] "Sample_Type"                               
## [35] "Primary_Radiation_Therapy"                 
## [36] "Primary_Surgery"                           
## [37] "Histology"                                 
## [38] "M_Stage"                                   
## [39] "Variant_Histology"                         
## [40] "Primary_Tumor_Location"                    
## [41] "Race"                                      
## [42] "Number_of_Samples_Per_Patient"             
## [43] "Sample_Site"                               
## [44] "Sex"                                       
## [45] "Smoking_Status"                            
## [46] "Surgery_for_Advanced_Disease"              
## [47] "Survival_Status"                           
## [48] "Survival_Time"                             
## [49] "Metastatic_Systemic_Therapy_"              
## [50] "Targeted_Therapy"                          
## [51] "OS.ID"                                     
## [52] "OS.time"
# Preview the first few rows
head(meta[, c("Sample_ID", "T_stage", "N_Stage", "M_Stage",
              "Age_at_Diagnosis", "Sex", "Survival_Status", "Survival_Time")])
##             Sample_ID T_stage N_Stage M_Stage Age_at_Diagnosis    Sex
## 5037-TUMOR 5037-TUMOR     T4a      N3      MX               60   Male
## 5116-TUMOR 5116-TUMOR     T4a      N2      MX               71   Male
## 5123-TUMOR 5123-TUMOR     T3a      N1      MX               73 Female
## 5172-TUMOR 5172-TUMOR      T3      N3      M1               74   Male
## 5446-TUMOR 5446-TUMOR      T3      N2      MX               76 Female
## 6026-TUMOR 6026-TUMOR     T4a      N2      MX               67   Male
##            Survival_Status Survival_Time
## 5037-TUMOR            Dead         33.25
## 5116-TUMOR         Unknown            NA
## 5123-TUMOR           Alive         53.06
## 5172-TUMOR            Dead         20.37
## 5446-TUMOR           Alive         25.00
## 6026-TUMOR            Dead         12.42

Visualisations

M Stage — Pie Chart

# Colour palette for M stage categories
mstage_colors <- c(
  "M0"      = "#4E79A7",
  "M1"      = "#E15759",
  "MX"      = "#EDC948",
  "unknown" = "#999999",
  "NA"      = "#CCCCCC"
)
# Normalise raw M stage values into clean categories
normalize_mstage <- function(x) {
  x_clean <- trimws(tolower(as.character(x)))
  case_when(
    x_clean == "m0"      ~ "M0",
    x_clean == "m1"      ~ "M1",
    x_clean == "mx"      ~ "MX",
    x_clean == "unknown" ~ "unknown",
    is.na(x) | x_clean == "" ~ "NA",
    TRUE ~ "unknown"
  )
}
mstage_data <- meta |>
  mutate(MStage = normalize_mstage(M_Stage)) |>
  count(MStage)

ggplot(mstage_data, aes(x = "", y = n, fill = MStage)) +
  geom_col(width = 1, color = "white", linewidth = 0.5) +
  geom_text(aes(x = 1.65, label = n),
            position = position_stack(vjust = 0.5), size = 4) +
  coord_polar("y") +
  scale_fill_manual(values = mstage_colors) +
  labs(title = "Clinical M Stage") +
  theme_void() +
  theme(
    plot.title      = element_text(hjust = 0.5, size = 13),
    legend.position = "right"
  )
Distribution of M stage across BLCA_BCAN_HCRN patients

Distribution of M stage across BLCA_BCAN_HCRN patients

T Stage - Pie Chart

# Colors
tstage_colors <- c(
  "Ta" = "#A0CBE8", "T1" = "#4E79A7", "T2" = "#F28E2B",
  "T3" = "#E15759", "T4" = "#B07AA1"
)

# Normalize function
normalize_tstage <- function(x) {
  case_when(
    grepl("^ta", x, ignore.case = TRUE) ~ "Ta",
    grepl("^t1", x, ignore.case = TRUE) ~ "T1",
    grepl("^t2", x, ignore.case = TRUE) ~ "T2",
    grepl("^t3", x, ignore.case = TRUE) ~ "T3",
    grepl("^t4", x, ignore.case = TRUE) ~ "T4",
    TRUE ~ NA_character_
  )
}

# Plot
stage_data <- meta |>
  mutate(Stage = normalize_tstage(T_stage)) |>
  filter(!is.na(Stage)) |>
  count(Stage)

ggplot(stage_data, aes(x = "", y = n, fill = Stage)) +
  geom_col(width = 1, color = "white", linewidth = 0.5) +
  geom_text(aes(x = 1.65, label = n),
            position = position_stack(vjust = 0.5), size = 4) +
  coord_polar("y") +
  scale_fill_manual(values = tstage_colors) +
  labs(title = "Clinical T Stage") +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5, size = 13),
        legend.position = "right")
Distribution of T stage across BLCA_BCAN_HCRN patients. The BLCA_BCAN_HCRN cohort consists entirely of muscle-invasive patients, so only T3 and T4 stages are represented.

Distribution of T stage across BLCA_BCAN_HCRN patients. The BLCA_BCAN_HCRN cohort consists entirely of muscle-invasive patients, so only T3 and T4 stages are represented.

N Stage - Pie Chart

nstage_colors <- c(
  "N+" = "#76B7B2", "N0" = "#4E79A7", "N1" = "#F28E2B",
  "N2" = "#E15759", "N3" = "#B07AA1", "Nx" = "#EDC948",
  "unknown" = "#999999", "NA" = "#CCCCCC"
)

normalize_nstage <- function(x) {
  x_clean <- trimws(tolower(as.character(x)))
  case_when(
    x_clean == "n+" ~ "N+",
    x_clean == "n0" ~ "N0",
    x_clean == "n1" ~ "N1",
    x_clean == "n2" ~ "N2",
    x_clean == "n3" ~ "N3",
    x_clean == "nx" ~ "Nx",
    x_clean == "unknown" ~ "unknown",
    is.na(x) | x_clean == "" ~ "NA",
    TRUE ~ "unknown"
  )
}

nstage_data <- meta |>
  mutate(NStage = normalize_nstage(N_Stage)) |>
  count(NStage)

ggplot(nstage_data, aes(x = "", y = n, fill = NStage)) +
  geom_col(width = 1, color = "white", linewidth = 0.5) +
  geom_text(aes(x = 1.65, label = n),
            position = position_stack(vjust = 0.5), size = 4) +
  coord_polar("y") +
  scale_fill_manual(values = nstage_colors) +
  labs(title = "Clinical N Stage") +
  theme_void() +
  theme(plot.title = element_text(hjust = 0.5, size = 13),
        legend.position = "right")

library(survival)
library(survminer)

surv_data <- meta |>
  mutate(
    # Simplify N stage into Positive vs Negative
    Nodal = case_when(
      grepl("^n0", N_Stage, ignore.case = TRUE) ~ "Node Negative",
      grepl("^n1|^n2|^n3|^n\\+", N_Stage, ignore.case = TRUE) ~ "Node Positive",
      TRUE ~ NA_character_
    ),
    time = as.numeric(Survival_Time),
    status = case_when(
      tolower(Survival_Status) == "dead" ~ 1,
      tolower(Survival_Status) == "alive" ~ 0,
      TRUE ~ NA_real_
    )
  ) |>
  filter(!is.na(Nodal), !is.na(time), !is.na(status), time > 0)

fit <- survfit(Surv(time, status) ~ Nodal, data = surv_data)

ggsurvplot(
  fit,
  data = surv_data,
  pval = TRUE,
  conf.int = FALSE,
  risk.table = TRUE,
  risk.table.height = 0.25,
  palette = c("#4E79A7", "#E15759"),
  legend.title = "Nodal Status",
  xlab = "Time (months)",
  ylab = "Overall Survival Probability",
  title = "Overall Survival by Nodal Status",
  ggtheme = theme_minimal(base_size = 12)
)


## Session Info


``` r
sessionInfo()
## R version 4.4.3 (2025-02-28)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] survminer_0.5.1             ggpubr_0.6.1               
##  [3] survival_3.8-3              dplyr_1.1.4                
##  [5] ggplot2_3.5.2               SummarizedExperiment_1.36.0
##  [7] Biobase_2.66.0              GenomicRanges_1.58.0       
##  [9] GenomeInfoDb_1.42.3         IRanges_2.40.1             
## [11] S4Vectors_0.44.0            BiocGenerics_0.52.0        
## [13] MatrixGenerics_1.18.1       matrixStats_1.5.0          
## [15] BEACON_0.0.0.9000          
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1        farver_2.1.2            fastmap_1.2.0          
##  [4] digest_0.6.39           lifecycle_1.0.5         magrittr_2.0.4         
##  [7] compiler_4.4.3          rlang_1.1.7             sass_0.4.10            
## [10] tools_4.4.3             yaml_2.3.10             data.table_1.17.2      
## [13] knitr_1.50              ggsignif_0.6.4          S4Arrays_1.6.0         
## [16] labeling_0.4.3          bit_4.6.0               DelayedArray_0.32.0    
## [19] xml2_1.4.1              RColorBrewer_1.1-3      abind_1.4-8            
## [22] withr_3.0.2             purrr_1.0.4             grid_4.4.3             
## [25] xtable_1.8-4            scales_1.4.0            cli_3.6.5              
## [28] rmarkdown_2.29          crayon_1.5.3            generics_0.1.4         
## [31] rstudioapi_0.17.1       km.ci_0.5-6             httr_1.4.7             
## [34] tzdb_0.5.0              commonmark_2.0.0        cachem_1.1.0           
## [37] stringr_1.5.1           zlibbioc_1.52.0         splines_4.4.3          
## [40] parallel_4.4.3          XVector_0.46.0          survMisc_0.5.6         
## [43] vctrs_0.7.1             Matrix_1.7-2            jsonlite_2.0.0         
## [46] carData_3.0-5           litedown_0.9            car_3.1-3              
## [49] bit64_4.6.0-1           rstatix_0.7.2           Formula_1.2-5          
## [52] jquerylib_0.1.4         tidyr_1.3.1             glue_1.8.0             
## [55] ggtext_0.1.2            stringi_1.8.7           gtable_0.3.6           
## [58] UCSC.utils_1.2.0        tibble_3.3.1            pillar_1.11.1          
## [61] rappdirs_0.3.4          htmltools_0.5.8.1       GenomeInfoDbData_1.2.13
## [64] R6_2.6.1                KMsurv_0.1-6            vroom_1.7.0            
## [67] evaluate_1.0.3          lattice_0.22-6          markdown_2.0           
## [70] backports_1.5.0         gridtext_0.1.5          broom_1.0.10           
## [73] bslib_0.9.0             Rcpp_1.0.14             gridExtra_2.3          
## [76] SparseArray_1.6.2       xfun_0.55               zoo_1.8-15             
## [79] pkgconfig_2.0.3