Yules PrCa Public Localized

library(rapiclient)
client <- get_api(url = "https://www.cbioportal.org/api/v2/api-docs")

library(cbioportalR)
set_cbioportal_db("public")

library(tidyverse)
library(dplyr)
library(ggplot2)
library(readxl)
library(corrplot)

#This is the Yule's coefficient I used for subsequent calculations
calcYulesYBetweenMatrices1 <- function(dm.x, dm.y) {
    if(!all(dm.x %in% c(0,1)) | !all(dm.y %in% c(0,1))) {
    stop("Error: calcYulesYBetweenMatrices() requires binary matrices as input. Please ensure all values are 0/1 or FALSE/TRUE.")
    }
  
  tt <- t( dm.x) %*%  dm.y # Count  TRUE and  TRUE
  tf <- t( dm.x) %*% !dm.y # Count  TRUE and FALSE
  ft <- t(!dm.x) %*%  dm.y # Count FALSE and  TRUE
  ff <- t(!dm.x) %*% !dm.y # Count FALSE and FALSE
  
  Y <- (tt * ff - tf * ft) / (tt * ff + tf * ft)
  return(Y)}

#All of the study names
all_studies <- available_studies()

#Gathering samples from individual studies 
cpcg <- available_samples("prad_cpcg_2017") %>%
  select(sampleId, patientId, studyId)

p1000 <- available_samples("prad_p1000") %>%
  select(sampleId, patientId, studyId)

tcga <- available_samples("prad_tcga_pan_can_atlas_2018") %>%
  select(sampleId, patientId, studyId)

#Selecting only the primary samples in p1000
p1000_tcga <- get_clinical_by_study(study_id = "prad_p1000", 
                                       clinical_attribute = "DATA_SOURCE",
                                        base_url = 'www.cbioportal.org/api') %>%
  filter(value == "TCGA")

p1000_primary <- get_clinical_by_study(study_id = "prad_p1000", 
                                        clinical_attribute = "SAMPLE_TYPE",
                                        base_url = 'www.cbioportal.org/api') %>%
  filter(value == "Primary") %>%
  filter(patientId != c(p1000_tcga$patientId))

#Combining the studies
df_pairs <- bind_rows(cpcg, p1000, tcga) %>%
  select(sampleId, studyId)

#Gathering the mutation data
prca_public <- get_genetics_by_sample(sample_study_pairs = df_pairs)

#As a dataframe
prca_public <- as.data.frame(prca_public[["mutation"]]) %>%
  select("hugoGeneSymbol", "sampleId") %>%
  rename(Gene_name = "hugoGeneSymbol",
         ID = "sampleId")

#Extracting the top genes the Public data
top_prca <- prca_public %>%
  group_by(Gene_name) %>%
  summarise(n = n()) %>%
  arrange(desc(n)) %>%
  head(15)

top_prca <- append(top_prca$Gene_name, "CDH1") %>%
  as.data.frame() %>%
  rename(Gene_name = ".")

#Clean data
prca_public_matrix <- prca_public %>%
  mutate(seen = as.numeric(1)) %>%
  pivot_wider(names_from = Gene_name, values_from = seen) %>%
  mutate_all(as.character) %>%
  mutate(across(everything(), ~ifelse(. == "NULL", 0, as.numeric(.)))) %>%
  select(-ID) %>%
  as.matrix()

#Accounting for multiple mutations of the same gene in the same sample
prca_public_matrix[is.na(prca_public_matrix)] <- 1

#Generating Yules Coefficient Matrix
yule.prca_public <- as.data.frame(calcYulesYBetweenMatrices1(prca_public_matrix, prca_public_matrix))

#Filtering for the top 15 genes + CDH1
yule.prca_public.filtered <- yule.prca_public[c(top_prca$Gene_name), ] %>%
  select(top_prca$Gene_name)

#Round to Yule's Coefficients to 2 digits
yule.prca_public.filtered <- as.matrix.data.frame(round(yule.prca_public.filtered, 
                             digits = 2))

order_indices <- order(rownames(yule.prca_public.filtered))
yule.prca_public.filtered <- yule.prca_public.filtered[order_indices, order_indices]

#All of PUBLIC datasets
#Generating p-values for the correlations of entire public dataset
p_prca_public <- cor.mtest(yule.prca_public.filtered)

#Visualize all relationships (statistically significant and non-statistically significant)
corrplot.mixed(yule.prca_public.filtered, 
               lower = "ellipse", 
               upper = "number", 
               number.cex = .6,
               tl.pos = 'lt')

#Visualize only the statistically significant comutations
corrplot.mixed(yule.prca_public.filtered, 
               lower = "ellipse", 
               upper = "number", 
               number.cex = .6,
               tl.pos = 'lt',
               p.mat = p_prca_public$p,
               sig.level = c(0.05),
               insig = 'blank')

Yules PrCa Public Localized

VN

2023-12-23