This file will contain all four data: Breast Cancer MIB (BCmib), Breast Cancer RNASeq (BCrna), Cardiotox MIB (CTmib), and Cardiotox RNASeq (CTrna). Goal is to compare tramatinib at 120 hrs across all assays.

Load libraries

Cardiotoxicity RNAseq data (CTrna)

CTrna <- read.csv("tramet.csv");

#every p val for CT RNAseq Tramatinib that is p<.05
CTrna <- CTrna %>% filter(pval < .05) %>% filter(!Mean.1 == 0);

#trim excess columns
CTrna <- CTrna[,c(1,12:13)]

#make gene names uppercase to match the breast cancer data
CTrna$Gene <- toupper(CTrna$Gene)
colnames(CTrna) <- c("Gene","log2","pval")

Cardio-toxicity MIB data (CTmib)

CTmib <- read_excel("MIB.xlsx"); #every value is significant for at least one rep
CTmib <- CTmib[,c(2,3,39)]

colnames(CTmib) <- c("Gene0","Gene","log2_CT")

Breast Cancer RNAseq data

BCrna <- read.csv("BreastCancer_DESeq_Results.csv");

#round to 8 figs, pull only p<.05 values, and trim excess columns
BCrna <- BCrna[,c(1,3,6)]
colnames(BCrna) <- c("Gene","log2","pval")

Breast Cancer MIB data (BCmib)

#MIB Breast cancer
BCmib <- read_excel("Copy of MCF10A_TimeCourse_Trametinib_proteinGroups_AW.xlsx", sheet = "LFQ normalized from Kinases tab")

BCmib <- BCmib[,c(1,11:13)][complete.cases(BCmib[-1]) | rowSums(!is.na(BCmib[-1])) > 0, ]
BCmib <- BCmib[rowSums(BCmib[-1], na.rm = TRUE) != 0, ]

#Take the mean of the replicates and doa log2 fold transformation
BCmib$mean <- rowMeans(BCmib[, 2:4], na.rm = TRUE)
BCmib$log2 <- log2(BCmib$mean)

#trim the df down to essentials
BCmib <- BCmib[,c(1,6)]
colnames(BCmib) <- c("Gene","log2_BC")

NEXT! Finding overlapping Genes in RNAseq data dfr is the dataframe for the combo RNA data

dfr <- merge(BCrna, CTrna, by = "Gene", suffixes = c("_BCrna", "_CTrna"))
colnames(dfr) <- c("Gene","BC_log2","BC_pval","CT_log2","CT_pval")

dfr[, 2:5] <- apply(dfr[, 2:5], 2, as.numeric)
## Warning in apply(dfr[, 2:5], 2, as.numeric): NAs introduced by coercion

Any relationships?

ggplot(dfr, aes(x = BC_log2, y = CT_log2)) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.2,size=4,shape=21) +
  labs(x = "BC_log2", y = "CT_log2", title = "Scatter Plot of Breast Cancer and CardioTox RNASeq Log2 fold change") + gghisto +
  theme(panel.background = element_rect(fill = "lightsteelblue1"),legend.position = "none") 
## Warning: Removed 3 rows containing missing values (`geom_point()`).

Volcano plots

p1 <- ggplot(BCrna, aes(x = log2, y = -log10(pval))) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.3,size=2,shape=21) +
  labs(x = "BC_log2", y = "-log10(BC_pval)", title = "Volcano Plot \nfor Breast Cancer RNAseq") +
  gghisto

CTrna = CTrna %>% 
  mutate(pval = as.numeric(pval),
         log2 = as.numeric(log2)) %>% 
  drop_na()
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `log2 = as.numeric(log2)`.
## Caused by warning:
## ! NAs introduced by coercion
p2 <- ggplot(CTrna, aes(x = log2, y = -log10(pval))) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.3,size=2,shape=21) +
  labs(x = "CT_log2", y = "-log10(CT_pval)", title = "Volcano Plot \nfor CardioTox RNAseq") +
  gghisto +
  xlim(-5, 10)
#add top 20 expressed genes (measured by log2 fold change highest absolute value)
grid.arrange(p1, p2, ncol = 2)
## Warning: Removed 1 rows containing missing values (`geom_point()`).

NOW bring it down to top 100 genes from each set

# Subset the rows with the top 100 highest values 

top_100ct = CTrna %>% 
  top_n(n = 100, wt = log2)

top_100bc = BCrna %>% 
  top_n(n = 100, wt = log2)

Venn Diagram showing differences

library(ggvenn)
## Loading required package: grid
library(VennDiagram)
## Loading required package: futile.logger
## 
## Attaching package: 'VennDiagram'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
library(RColorBrewer)

# Generate plot
v <- venn.diagram(list(`Breast \nCancer`=top_100bc$Gene, `Cardio\ntoxicity`=top_100ct$Gene),
                  fill = c("deeppink", "lightblue"),
                  alpha = c(0.5, 0.5), cat.cex = 2, cex=2,
                  filename=NULL)

# have a look at the default plot
grid.newpage();grid.draw(v)

# with names
x <- list(`Breast \nCancer`=top_100bc$Gene, `Cardio\ntoxicity`=top_100ct$Gene)

ggvenn(data=x, text_size = 1, show_elements = T, label_sep = "\n", fill_color = c("deeppink", "skyblue1"))

trying to make a venn diagram style list

gene_status <- data.frame(Gene = unique(c(top_100bc$Gene, top_100ct$Gene)))

# Add columns to indicate whether each gene is in top_100ct or top_100bc
gene_status$cardiotox <- gene_status$Gene %in% top_100ct$Gene
gene_status$breastcancer <- gene_status$Gene %in% top_100bc$Gene
gene_status$both <- gene_status$cardiotox & gene_status$breastcancer

#gene_status
# Subset the rows with the top 60 highest values. This is because there is no overlap in the top 60 values and plotting 100 from each set is too much on one graph

top_60ct = CTrna %>% 
  top_n(n = 60, wt = log2)

top_60bc = BCrna %>% 
  top_n(n = 60, wt = log2)


library(ggdist)
#try to use jitter dodge to make a sspread of points

ggplot(gene_status, aes(x = factor(breastcancer), y = factor(cardiotox))) +
  geom_point(position = position_jitter(width = 0.2, height = 0.3), size = 4,fill = "violetred1",color= "violetred4", alpha = 0.5,shape=21) +
  labs(x = "in_top_60bc", y = "in_top_60ct", title = "Counts of Genes in Top 60 BC and CT") +
  scale_x_discrete(labels = c("FALSE", "TRUE")) +
  scale_y_discrete(labels = c("FALSE", "TRUE"))+ gghisto +
  theme(panel.background = element_rect(fill = "lightsteelblue1"),legend.position = "none")+ 
  geom_label_repel(aes(label = Gene),
                  box.padding   = 0.25, 
                  point.padding = 0.1,
                  segment.color = 'white',
                  max.overlaps = 200,
                  size=2)


Onto MIB data!

MIB venn diagram

# Generate plot
v <- venn.diagram(list(`Breast \nCancer`=na.omit(BCmib$Gene), `Cardio\ntoxicity`=na.omit(CTmib$Gene)),
                  fill = c("deeppink", "lightblue"),
                  alpha = c(0.5, 0.5), cat.cex = 2, cex=2,
                  filename=NULL)

# have a look at the default plot
grid.newpage();grid.draw(v)

x <- list(`Breast \nCancer`=BCmib$Gene, `Cardio\ntoxicity`=CTmib$Gene)
ggvenn(data=x, text_size = 1, show_elements = T, label_sep = "\n", fill_color = c("deeppink", "skyblue1"))

# Subset the rows with the top 75 highest values MIB
top_75ct = CTmib %>% 
  top_n(n = 75, wt = abs(log2_CT))

top_75bc = BCmib %>% 
  top_n(n = 75, wt = abs(log2_BC))

#mib log2 status
mib_status <- data.frame(Gene = unique(c(top_75bc$Gene, top_75ct$Gene)))

# Add columns to indicate whether each gene is in top_75ct or top_75bc
mib_status$cardiotox <- mib_status$Gene %in% top_75ct$Gene
mib_status$breastcancer <- mib_status$Gene %in% top_75bc$Gene
mib_status$both <- mib_status$cardiotox & mib_status$breastcancer

#mib_status
#use jitter dodge to make a sspread of points

ggplot(mib_status, aes(x = factor(breastcancer), y = factor(cardiotox))) +
  geom_point(position = position_jitter(width = 0.2, height = 0.3), fill = "violetred1",color= "violetred4", alpha = 0.5,size=4,shape=21) +
  labs(x = "in_top_75bc", y = "in_top_75ct", title = "Counts of Genes in Top 75 BC and CT MIB") +
  scale_x_discrete(labels = c("FALSE", "TRUE")) +
  scale_y_discrete(labels = c("FALSE", "TRUE"))+ gghisto +
  theme(panel.background = element_rect(fill = "lightsteelblue1"),legend.position = "none")+ 
  geom_label_repel(aes(label = Gene),
                  box.padding   = 0.25, 
                  point.padding = 0.1,
                  segment.color = 'white',
                  max.overlaps = 200,
                  size=2)

breast cancer MIB vs RNA

# Generate plot
v <- venn.diagram(list(`Breast\nCancer MIB`=na.omit(BCmib$Gene), `Breast\nCancer RNA`=top_100bc$Gene),
                  fill = c("deeppink", "lightblue"),
                  alpha = c(0.5, 0.5), cat.cex = 2, cex=2,
                  filename=NULL)

# have a look at the default plot
grid.newpage();grid.draw(v)

x <- list(`Breast\nCancer MIB`=BCmib$Gene, `Breast\nCancer RNA`=top_100bc$Gene)
ggvenn(data=x, text_size = 1, show_elements = T, label_sep = "\n", fill_color = c("deeppink", "skyblue1"))

cardiotox MIB vs RNA

# Generate plot
v <- venn.diagram(list(`Cardiotox\n MIB`=na.omit(CTmib$Gene), `cardiotox\n RNA`=top_100ct$Gene),
                  fill = c("deeppink", "lightblue"),
                  alpha = c(0.5, 0.5), cat.cex = 2, cex=2,
                  filename=NULL)

# have a look at the default plot
grid.newpage();grid.draw(v)

x <- list(`Cardiotox\n MIB`=na.omit(CTmib$Gene), `cardiotox\n RNA`=top_100ct$Gene)
ggvenn(data=x, text_size = 1, show_elements = T, label_sep = "\n", fill_color = c("deeppink", "skyblue1"))