Install and load the required packages.
BEACON provides curated bladder cancer datasets. Here we load the BLCA_BCAN_HCRN cohort — a clinical trial dataset with 61 patients and 58,385 genes.
## class: SummarizedExperiment
## dim: 58385 61
## metadata(0):
## assays(1): expr
## rownames(58385): 1-Dec 1-Mar ... ZZEF1 ZZZ3
## rowData names(0):
## colnames(61): 5037-TUMOR 5116-TUMOR ... 8250-TUMOR 8726-TUMOR
## colData names(52): Sample_ID T_stage ... OS.ID OS.time
# Extract clinical metadata and expression matrix
meta <- as.data.frame(HCRN@colData)
expr <- SummarizedExperiment::assay(HCRN)
cat("Patients:", nrow(meta), "\n")## Patients: 61
## Clinical variables: 52
## Genes: 58385
## [1] "Sample_ID"
## [2] "T_stage"
## [3] "N_Stage"
## [4] "Neoadjuvant_Therapy_"
## [5] "Study_ID"
## [6] "Patient_ID"
## [7] "BCAN_ID"
## [8] "ADC_._Immunotherapy"
## [9] "ADC_Therapy"
## [10] "Adjuvant_Radiation_Therapy"
## [11] "Adjuvant_Therapy"
## [12] "Age_at_Diagnosis"
## [13] "ECOG"
## [14] "ADC_Therapy_Best_Response"
## [15] "ADC_._Immunotherapy_Best_Response"
## [16] "Chemotherapy_Best_Response"
## [17] "Chemotherapy_._Immunotherapy_Best_Response"
## [18] "Immunotherapy_Best_Response"
## [19] "Targeted_Therapy_Best_Response"
## [20] "Cancer_Type"
## [21] "Cancer_Type_Detailed"
## [22] "Chemotherapy_"
## [23] "Chemotherapy_._Immunotherapy"
## [24] "Survival_with_Chemotherapy"
## [25] "Concurrent_Chemoradiation"
## [26] "Reason_for_Death"
## [27] "First_Treatment"
## [28] "X_Immunotherapy"
## [29] "Survival_with_Immunotherapy"
## [30] "Metastatic_Radiation_Therapy"
## [31] "Mutation_Count"
## [32] "Therapy_prior_to_NGS"
## [33] "Oncotree_Code"
## [34] "Sample_Type"
## [35] "Primary_Radiation_Therapy"
## [36] "Primary_Surgery"
## [37] "Histology"
## [38] "M_Stage"
## [39] "Variant_Histology"
## [40] "Primary_Tumor_Location"
## [41] "Race"
## [42] "Number_of_Samples_Per_Patient"
## [43] "Sample_Site"
## [44] "Sex"
## [45] "Smoking_Status"
## [46] "Surgery_for_Advanced_Disease"
## [47] "Survival_Status"
## [48] "Survival_Time"
## [49] "Metastatic_Systemic_Therapy_"
## [50] "Targeted_Therapy"
## [51] "OS.ID"
## [52] "OS.time"
# Preview the first few rows
head(meta[, c("Sample_ID", "T_stage", "N_Stage", "M_Stage",
"Age_at_Diagnosis", "Sex", "Survival_Status", "Survival_Time")])## Sample_ID T_stage N_Stage M_Stage Age_at_Diagnosis Sex
## 5037-TUMOR 5037-TUMOR T4a N3 MX 60 Male
## 5116-TUMOR 5116-TUMOR T4a N2 MX 71 Male
## 5123-TUMOR 5123-TUMOR T3a N1 MX 73 Female
## 5172-TUMOR 5172-TUMOR T3 N3 M1 74 Male
## 5446-TUMOR 5446-TUMOR T3 N2 MX 76 Female
## 6026-TUMOR 6026-TUMOR T4a N2 MX 67 Male
## Survival_Status Survival_Time
## 5037-TUMOR Dead 33.25
## 5116-TUMOR Unknown NA
## 5123-TUMOR Alive 53.06
## 5172-TUMOR Dead 20.37
## 5446-TUMOR Alive 25.00
## 6026-TUMOR Dead 12.42
# Colour palette for M stage categories
mstage_colors <- c(
"M0" = "#4E79A7",
"M1" = "#E15759",
"MX" = "#EDC948",
"unknown" = "#999999",
"NA" = "#CCCCCC"
)# Normalise raw M stage values into clean categories
normalize_mstage <- function(x) {
x_clean <- trimws(tolower(as.character(x)))
case_when(
x_clean == "m0" ~ "M0",
x_clean == "m1" ~ "M1",
x_clean == "mx" ~ "MX",
x_clean == "unknown" ~ "unknown",
is.na(x) | x_clean == "" ~ "NA",
TRUE ~ "unknown"
)
}mstage_data <- meta |>
mutate(MStage = normalize_mstage(M_Stage)) |>
count(MStage)
ggplot(mstage_data, aes(x = "", y = n, fill = MStage)) +
geom_col(width = 1, color = "white", linewidth = 0.5) +
geom_text(aes(x = 1.65, label = n),
position = position_stack(vjust = 0.5), size = 4) +
coord_polar("y") +
scale_fill_manual(values = mstage_colors) +
labs(title = "Clinical M Stage") +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5, size = 13),
legend.position = "right"
)Distribution of M stage across BLCA_BCAN_HCRN patients
# Colors
tstage_colors <- c(
"Ta" = "#A0CBE8", "T1" = "#4E79A7", "T2" = "#F28E2B",
"T3" = "#E15759", "T4" = "#B07AA1"
)
# Normalize function
normalize_tstage <- function(x) {
case_when(
grepl("^ta", x, ignore.case = TRUE) ~ "Ta",
grepl("^t1", x, ignore.case = TRUE) ~ "T1",
grepl("^t2", x, ignore.case = TRUE) ~ "T2",
grepl("^t3", x, ignore.case = TRUE) ~ "T3",
grepl("^t4", x, ignore.case = TRUE) ~ "T4",
TRUE ~ NA_character_
)
}
# Plot
stage_data <- meta |>
mutate(Stage = normalize_tstage(T_stage)) |>
filter(!is.na(Stage)) |>
count(Stage)
ggplot(stage_data, aes(x = "", y = n, fill = Stage)) +
geom_col(width = 1, color = "white", linewidth = 0.5) +
geom_text(aes(x = 1.65, label = n),
position = position_stack(vjust = 0.5), size = 4) +
coord_polar("y") +
scale_fill_manual(values = tstage_colors) +
labs(title = "Clinical T Stage") +
theme_void() +
theme(plot.title = element_text(hjust = 0.5, size = 13),
legend.position = "right")Distribution of T stage across BLCA_BCAN_HCRN patients. The BLCA_BCAN_HCRN cohort consists entirely of muscle-invasive patients, so only T3 and T4 stages are represented.
nstage_colors <- c(
"N+" = "#76B7B2", "N0" = "#4E79A7", "N1" = "#F28E2B",
"N2" = "#E15759", "N3" = "#B07AA1", "Nx" = "#EDC948",
"unknown" = "#999999", "NA" = "#CCCCCC"
)
normalize_nstage <- function(x) {
x_clean <- trimws(tolower(as.character(x)))
case_when(
x_clean == "n+" ~ "N+",
x_clean == "n0" ~ "N0",
x_clean == "n1" ~ "N1",
x_clean == "n2" ~ "N2",
x_clean == "n3" ~ "N3",
x_clean == "nx" ~ "Nx",
x_clean == "unknown" ~ "unknown",
is.na(x) | x_clean == "" ~ "NA",
TRUE ~ "unknown"
)
}
nstage_data <- meta |>
mutate(NStage = normalize_nstage(N_Stage)) |>
count(NStage)
ggplot(nstage_data, aes(x = "", y = n, fill = NStage)) +
geom_col(width = 1, color = "white", linewidth = 0.5) +
geom_text(aes(x = 1.65, label = n),
position = position_stack(vjust = 0.5), size = 4) +
coord_polar("y") +
scale_fill_manual(values = nstage_colors) +
labs(title = "Clinical N Stage") +
theme_void() +
theme(plot.title = element_text(hjust = 0.5, size = 13),
legend.position = "right")library(survival)
library(survminer)
surv_data <- meta |>
mutate(
# Simplify N stage into Positive vs Negative
Nodal = case_when(
grepl("^n0", N_Stage, ignore.case = TRUE) ~ "Node Negative",
grepl("^n1|^n2|^n3|^n\\+", N_Stage, ignore.case = TRUE) ~ "Node Positive",
TRUE ~ NA_character_
),
time = as.numeric(Survival_Time),
status = case_when(
tolower(Survival_Status) == "dead" ~ 1,
tolower(Survival_Status) == "alive" ~ 0,
TRUE ~ NA_real_
)
) |>
filter(!is.na(Nodal), !is.na(time), !is.na(status), time > 0)
fit <- survfit(Surv(time, status) ~ Nodal, data = surv_data)
ggsurvplot(
fit,
data = surv_data,
pval = TRUE,
conf.int = FALSE,
risk.table = TRUE,
risk.table.height = 0.25,
palette = c("#4E79A7", "#E15759"),
legend.title = "Nodal Status",
xlab = "Time (months)",
ylab = "Overall Survival Probability",
title = "Overall Survival by Nodal Status",
ggtheme = theme_minimal(base_size = 12)
)
## Session Info
``` r
sessionInfo()
## R version 4.4.3 (2025-02-28)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.5
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] survminer_0.5.1 ggpubr_0.6.1
## [3] survival_3.8-3 dplyr_1.1.4
## [5] ggplot2_3.5.2 SummarizedExperiment_1.36.0
## [7] Biobase_2.66.0 GenomicRanges_1.58.0
## [9] GenomeInfoDb_1.42.3 IRanges_2.40.1
## [11] S4Vectors_0.44.0 BiocGenerics_0.52.0
## [13] MatrixGenerics_1.18.1 matrixStats_1.5.0
## [15] BEACON_0.0.0.9000
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 farver_2.1.2 fastmap_1.2.0
## [4] digest_0.6.39 lifecycle_1.0.5 magrittr_2.0.4
## [7] compiler_4.4.3 rlang_1.1.7 sass_0.4.10
## [10] tools_4.4.3 yaml_2.3.10 data.table_1.17.2
## [13] knitr_1.50 ggsignif_0.6.4 S4Arrays_1.6.0
## [16] labeling_0.4.3 bit_4.6.0 DelayedArray_0.32.0
## [19] xml2_1.4.1 RColorBrewer_1.1-3 abind_1.4-8
## [22] withr_3.0.2 purrr_1.0.4 grid_4.4.3
## [25] xtable_1.8-4 scales_1.4.0 cli_3.6.5
## [28] rmarkdown_2.29 crayon_1.5.3 generics_0.1.4
## [31] rstudioapi_0.17.1 km.ci_0.5-6 httr_1.4.7
## [34] tzdb_0.5.0 commonmark_2.0.0 cachem_1.1.0
## [37] stringr_1.5.1 zlibbioc_1.52.0 splines_4.4.3
## [40] parallel_4.4.3 XVector_0.46.0 survMisc_0.5.6
## [43] vctrs_0.7.1 Matrix_1.7-2 jsonlite_2.0.0
## [46] carData_3.0-5 litedown_0.9 car_3.1-3
## [49] bit64_4.6.0-1 rstatix_0.7.2 Formula_1.2-5
## [52] jquerylib_0.1.4 tidyr_1.3.1 glue_1.8.0
## [55] ggtext_0.1.2 stringi_1.8.7 gtable_0.3.6
## [58] UCSC.utils_1.2.0 tibble_3.3.1 pillar_1.11.1
## [61] rappdirs_0.3.4 htmltools_0.5.8.1 GenomeInfoDbData_1.2.13
## [64] R6_2.6.1 KMsurv_0.1-6 vroom_1.7.0
## [67] evaluate_1.0.3 lattice_0.22-6 markdown_2.0
## [70] backports_1.5.0 gridtext_0.1.5 broom_1.0.10
## [73] bslib_0.9.0 Rcpp_1.0.14 gridExtra_2.3
## [76] SparseArray_1.6.2 xfun_0.55 zoo_1.8-15
## [79] pkgconfig_2.0.3