This file will contain all four data: Breast Cancer MIB (BCmib), Breast Cancer RNASeq (BCrna), Cardiotox MIB (CTmib), and Cardiotox RNASeq (CTrna). Goal is to compare tramatinib at 120 hrs across all assays.

Load libraries

Cardiotoxicity RNAseq data (CTrna)

CTrna <- read.csv("tramet.csv");

#every p val for CT RNAseq Tramatinib that is p<.05
CTrna <- CTrna %>% filter(pval < .05) %>% filter(!Mean.1 == 0);

#trim excess columns
CTrna <- CTrna[,c(1,12:13)]

#make gene names uppercase to match the breast cancer data
CTrna$Gene <- toupper(CTrna$Gene)

Cardio-toxicity MIB data (CTmib)

CTmib <- read_excel("MIB.xlsx"); #every value is significant for at least one rep
CTmib <- CTmib[,c(2,3,39)]

colnames(CTmib) <- c("Gene0","Gene","log2_CT")

Breast Cancer RNAseq data

BCrna <- read.csv("BreastCancer_DESeq_Results.csv");

#round to 8 figs, pull only p<.05 values, and trim excess columns
BCrna[, 2:7] <- round(BCrna[, 2:7], 8)
BCrna <- BCrna[BCrna$padj<.05,c(1,3,7)]

Breast Cancer MIB data (BCmib)

#MIB Breast cancer
BCmib <- read_excel("Copy of MCF10A_TimeCourse_Trametinib_proteinGroups_AW.xlsx", sheet = "LFQ normalized from Kinases tab")

BCmib <- BCmib[,c(1,11:13)][complete.cases(BCmib[-1]) | rowSums(!is.na(BCmib[-1])) > 0, ]
BCmib <- BCmib[rowSums(BCmib[-1], na.rm = TRUE) != 0, ]

#Take the mean of the replicates and doa log2 fold transformation
BCmib$mean <- rowMeans(BCmib[, 2:4], na.rm = TRUE)
BCmib$log2 <- log2(BCmib$mean)

#trim the df down to essentials
BCmib <- BCmib[,c(1,6)]
colnames(BCmib) <- c("Gene","log2_BC")

NEXT! Finding overlapping Genes in RNAseq data dfr is the dataframe for the RNA data

dfr <- merge(BCrna, CTrna, by = "Gene", suffixes = c("_BCrna", "_CTrna"))
colnames(dfr) <- c("Gene","BC_log2","BC_pval","CT_log2","CT_pval")

dfr[, 2:5] <- apply(dfr[, 2:5], 2, as.numeric)
## Warning in apply(dfr[, 2:5], 2, as.numeric): NAs introduced by coercion

Any relationships?

ggplot(dfr, aes(x = BC_log2, y = CT_log2)) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.2,size=4,shape=21) +
  labs(x = "BC_log2", y = "CT_log2", title = "Scatter Plot of Breast Cancer and CardioTox RNASeq Log2 fold change") + gghisto +
  theme(panel.background = element_rect(fill = "lightsteelblue1"),legend.position = "none") 
## Warning: Removed 3 rows containing missing values (`geom_point()`).

ggplot(dfr, aes(x = BC_pval, y = CT_pval)) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.2,size=6,shape=21) +
  labs(x = "BC_pval", y = "CT_pval", title = "Scatter Plot of Breast Cancer and CardioTox RNASeq pvals") + gghisto +
  theme(panel.background = element_rect(fill = "lightsteelblue1"),legend.position = "none") 

Volcano plots

p1 <- ggplot(dfr, aes(x = BC_log2, y = -log10(BC_pval))) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.3,size=2,shape=21) +
  labs(x = "BC_log2", y = "-log10(BC_pval)", title = "Volcano Plot \nfor Breast Cancer RNAseq") +
  gghisto

p2 <- ggplot(dfr, aes(x = CT_log2, y = -log10(CT_pval))) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.3,size=2,shape=21) +
  labs(x = "CT_log2", y = "-log10(CT_pval)", title = "Volcano Plot \nfor CardioTox RNAseq") +
  gghisto +
  xlim(-5, 10)
grid.arrange(p1, p2, ncol = 2)
## Warning: Removed 3 rows containing missing values (`geom_point()`).

NOW bring it down to top 200 genes from each set

# Subset the rows with the top 200 highest values in CARDIOTOX
top_200ct <- dfr[order(dfr$CT_log2, decreasing = TRUE)[1:200], c(1,4)]

# Subset the rows with the top 200 highest values in BREAST CANCER
top_200bc <- dfr[order(dfr$BC_log2, decreasing = TRUE)[1:200], c(1,2)]

#How many overlap??
common_top200_gene <- intersect(top_200bc$Gene, top_200ct$Gene)

#subset based on the overlapping top 200 genes
dftop <- dfr[dfr$Gene %in% common_top200_gene, ]

#these are all the genes that occured in the top 200 for each breast cancer and cardiotox. 46 total

Replot the smaller group. Not much of interest.

ggplot(dftop, aes(x = BC_log2, y = CT_log2)) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.5,size=4,shape=21) +
  labs(x = "BC_log2", y = "CT_log2", title = "Scatter Plot of Breast Cancer and CardioTox RNASeq Log2 fold change") + gghisto +
  theme(panel.background = element_rect(fill = "lightsteelblue1"),legend.position = "none")+ 
  geom_label_repel(aes(label = Gene),
                  box.padding   = 0.25, 
                  point.padding = 0.1,
                  segment.color = 'grey50',
                  max.overlaps = 20,
                  size=4)
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Venn Diagram showing differences

library(VennDiagram)
## Loading required package: grid
## Loading required package: futile.logger
## 
## Attaching package: 'VennDiagram'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
# Generate plot
v <- venn.diagram(list(`Breast \nCancer`=top_200bc$Gene, `Cardio\ntoxicity`=top_200ct$Gene),
                  fill = c("deeppink", "lightblue"),
                  alpha = c(0.5, 0.5), cat.cex = 2, cex=2,
                  filename=NULL)

# have a look at the default plot
grid.newpage();grid.draw(v)

trying to make a venn diagram style list

gene_status <- data.frame(Gene = unique(c(top_200bc$Gene, top_200ct$Gene)))

# Add columns to indicate whether each gene is in top_200ct or top_200bc
gene_status$cardiotox <- gene_status$Gene %in% top_200ct$Gene
gene_status$breastcancer <- gene_status$Gene %in% top_200bc$Gene
gene_status$both <- gene_status$cardiotox & gene_status$breastcancer

gene_status

attempt to plot it

library(ggdist)

#try to use jitter dodge to make a sspread of points

ggplot(gene_status, aes(x = factor(breastcancer), y = factor(cardiotox))) +
  geom_point(position = position_jitter(width = 0.2, height = 0.3), size = 4,fill = "violetred1",color= "violetred4", alpha = 0.5,size=4,shape=21) +
  labs(x = "in_top_200bc", y = "in_top_200ct", title = "Counts of Genes in Top 200 BC and CT") +
  scale_x_discrete(labels = c("FALSE", "TRUE")) +
  scale_y_discrete(labels = c("FALSE", "TRUE"))+ gghisto +
  theme(panel.background = element_rect(fill = "lightsteelblue1"),legend.position = "none")+ 
  geom_label_repel(aes(label = Gene),
                  box.padding   = 0.25, 
                  point.padding = 0.1,
                  segment.color = 'white',
                  max.overlaps = 100,
                  size=2)
## Warning: Duplicated aesthetics after name standardisation: size
## Warning: ggrepel: 308 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps


Onto MIB data! Find overlapping sig genes

dfm <- merge(BCmib, CTmib, by = "Gene")[,c(1,2,4)]
dfr[, 2:5] <- apply(dfr[, 2:5], 2, as.numeric)
ggplot(dfm, aes(x = log2_BC, y = log2_CT)) +
  geom_point(fill = "violetred1",color= "violetred4", alpha = 0.6,size=6,shape=21) +
  labs(x = "BC_log2", y = "CT_log2", title = "Scatter Plot of Breast Cancer and CardioTox MIB Log2 fold change") + gghisto +
  theme(panel.background = element_rect(fill = "lightsteelblue1"),legend.position = "none") + 
  geom_label_repel(aes(label = Gene),
                  box.padding   = 0.25, 
                  point.padding = 0.1,
                  segment.color = 'grey50',
                  max.overlaps = 20,
                  size=3)