library(cbioportalR)
set_cbioportal_db("public")
library(tidyverse)
library(dplyr)
library(ggplot2)
library(readxl)
library(corrplot)
#This is the Yule's coefficient I used for subsequent calculations
calcYulesYBetweenMatrices1 <- function(dm.x, dm.y) {
if(!all(dm.x %in% c(0,1)) | !all(dm.y %in% c(0,1))) {
stop("Error: calcYulesYBetweenMatrices() requires binary matrices as input. Please ensure all values are 0/1 or FALSE/TRUE.")
}
tt <- t( dm.x) %*% dm.y # Count TRUE and TRUE
tf <- t( dm.x) %*% !dm.y # Count TRUE and FALSE
ft <- t(!dm.x) %*% dm.y # Count FALSE and TRUE
ff <- t(!dm.x) %*% !dm.y # Count FALSE and FALSE
Y <- (tt * ff - tf * ft) / (tt * ff + tf * ft)
return(Y)}
#All of the study names
all_studies <- available_studies()
su2c_2019 <- available_samples("prad_su2c_2019") %>%
select(sampleId, patientId, studyId)
#Gathering the mutation data
prca_public_metastasized <- get_genetics_by_sample(sample_id = c(su2c_2019$sampleId),
study_id = "prad_su2c_2019")
#As a dataframe
prca_public_metastasized <- as.data.frame(prca_public_metastasized[["mutation"]]) %>%
select("hugoGeneSymbol", "sampleId") %>%
rename(Gene_name = "hugoGeneSymbol",
ID = "sampleId")
#Extracting the top genes the Public data
top_prca_metastasized <- prca_public_metastasized %>%
group_by(Gene_name) %>%
summarise(n = n()) %>%
arrange(desc(n)) %>%
head(15)
top_prca_metastasized <- append(top_prca_metastasized$Gene_name, "CDH1") %>%
as.data.frame() %>%
rename(Gene_name = ".")
#Clean data
prca_public_metastasized_matrix <- prca_public_metastasized %>%
mutate(seen = as.numeric(1)) %>%
pivot_wider(names_from = Gene_name, values_from = seen) %>%
mutate_all(as.character) %>%
mutate(across(everything(), ~ifelse(. == "NULL", 0, as.numeric(.)))) %>%
select(-ID) %>%
as.matrix()
#Accounting for multiple mutations of the same gene in the same sample
prca_public_metastasized_matrix[is.na(prca_public_metastasized_matrix)] <- 1
#Generating Yules Coefficient Matrix
yule.prca_public_metastasized <- as.data.frame(calcYulesYBetweenMatrices1(prca_public_metastasized_matrix, prca_public_metastasized_matrix))
#Filtering for the top 15 genes + CDH1
yule.prca_public_metastasized.filtered <- yule.prca_public_metastasized[c(top_prca_metastasized$Gene_name), ] %>%
select(top_prca_metastasized$Gene_name)
#Round to Yule's Coefficients to 2 digits
yule.prca_public_metastasized.filtered <- as.matrix.data.frame(round(yule.prca_public_metastasized.filtered,
digits = 2))
order_indices_metastasized <- order(rownames(yule.prca_public_metastasized.filtered))
yule.prca_public_metastasized.filtered <- yule.prca_public_metastasized.filtered[order_indices_metastasized, order_indices_metastasized]
#All of PUBLIC datasets
#Generating p-values for the correlations of entire public dataset
p_prca_public <- cor.mtest(yule.prca_public_metastasized.filtered)
#Visualize all relationships (statistically significant and non-statistically significant)
corrplot.mixed(yule.prca_public_metastasized.filtered,
lower = "ellipse",
upper = "number",
number.cex = .6,
tl.pos = 'lt')

#Visualize only the statistically significant comutations
corrplot.mixed(yule.prca_public_metastasized.filtered,
lower = "ellipse",
upper = "number",
number.cex = .6,
tl.pos = 'lt',
p.mat = p_prca_public$p,
sig.level = c(0.05),
insig = 'blank')
