So, lets load RMarkdown

library(rmarkdown)

set messages to FALSE on everything (prevents certain boring things from being shown in the results)

knitr::opts_chunk$set(echo = TRUE, message=FALSE,warning=FALSE,collapse = TRUE)

PACKAGES

library(reshape2)
library(ggplot2)
library(dplyr)
library(plotly)
library(viridis)
library(data.table)
library(pheatmap)
library(tidyverse)
library(ggthemes)
library(clipr)
library(tidyr)
library(Rcpp)

COLORS

mycolors<-c(viridis(15))
felix_cols<-mycolors[c(5,2)]
felix_4cols<-mycolors[c(15,10,8,2)]
plain_cols1<-c("blue","gray")
plain_cols2<-c("red","gray")

pats_cols<-colorRampPalette(c("#FDE725FF", "white","#440154FF"))(21)
leos_cols<-colorRampPalette(c("white","blue"))(10)

LOAD DATA AND MAKE AN OVERALL HEATMAP

## load the dataset (cells treated with COVID)
bc_data<-read_csv(file="breast_cancer_cells.csv")

## make some new columns that store the log ratios  
bc_data<-bc_data %>% mutate(
  log_MCF7=log2((MCF7_1+MCF7_2)/(MCF10A_1+MCF10A_2)), 
  log_MDA231=log2((MDA231_1+MDA231_2)/(MCF10A_1+MCF10A_2)), 
  log_MDA468=log2((MDA468_1+MDA468_2)/(MCF10A_1+MCF10A_2)),
  log_SKBR3=log2((SKBR3_1+SKBR3_2)/(MCF10A_1+MCF10A_2))) 

#control is MCF10A 
#naming each new column based on names of cell lines that we are comparing
## make a matrix of just the data
bc_mat1<-bc_data %>% select(log_MCF7:log_SKBR3) %>% as.matrix() %>% round(.,2)
#changed name for bc_mat1
## make a heatmap from the data
pheatmap(bc_mat1, color=pats_cols,cellwidth=30,cellheight=.03,cluster_cols=FALSE,cluster_rows=FALSE,
                    legend=TRUE,fontsize = 7,scale="column")


#changed so that the heatmap is made using bc_mat1

PLOTTING A VOLCANO

##BASIC VOLCANO PLOT

## Add a column that stores the negative log of the pvalue
bc_data<-bc_data %>% mutate(neglog_SKBR3=-log10(pvalue_SKBR3_vs_MCF10A))

#changed 24hr to SKBR3 to create negative log column, I could have picked any of the 4 cell lines but I picked SKBR3

## Use ggplot to plot the log ratio at 24 hours against the -log p-value at 24 hours
volcano_plot<-bc_data %>% ggplot(aes(x=log_SKBR3,y=neglog_SKBR3,description=Gene_Symbol))+
  geom_point(alpha=0.7,color="blue")

#changed values to make a volcano plot with negative and positive logs of SKBR3 
volcano_plot


  
## BETTER VOLCANO PLOT

## Define the siginificant ones so they can be colored
bc_data<-bc_data %>% mutate(significance=ifelse((log_SKBR3>2 & neglog_SKBR3>2.99),"UP", ifelse((log_SKBR3<c(-2) & neglog_SKBR3>2.99),"DOWN","NOT SIG")))

## Some standard colors
plain_cols3<-c("green","gray","pink")
# came back to change the colours on my volcano plot to match the colours I used for the bar graphs at the end of this

## volcano plot as before with some added things

better_volcano_plot<-bc_data %>% ggplot(aes(x=log_SKBR3,y=neglog_SKBR3,description=Gene_Symbol,color=significance))+
  geom_point(alpha=0.7)+
  scale_color_manual(values=plain_cols3)+
  xlim(-6,6)+
  theme_bw()+
  theme(axis.text = element_text(colour = "black",size=14))+
  theme(text = element_text(size=14))+
  labs(x="log ratio for SKBR3 compared to non tumorogenic MCF10A",y="-log(p-value)")
  

better_volcano_plot


#changed values and title of x axis, left significance at 2 and 2.99 values
## use ggplotly to see hover over the points to see the gene names
ggplotly(better_volcano_plot)

Barplots of significant points of interest

## EXAMPLES OF A COUPLE PROTEINS or GENES

# Make a pivot longer table of the C19 data for Control_2h_1:Virus_24h_2
bc_data_long<-pivot_longer(bc_data, cols = c(MCF10A_1:SKBR3_2), names_to = 'variable')%>% select(-c(pvalue_MCF7_vs_MCF10A:significance))%>%select(-Description)

#removed "description" from long table, added information from bc_data

## Because ggplot will automatically plot alphabetically on the x-axis, we need to make sure that our timepoints will be in the proper order by adding a new column using the factor() function

#bc_data_long$order <- as.character(bc_data_long$variable)
#bc_data_long$order <- factor(bc_data_long$order,levels=c('','Control_2h_2','Virus_2h_1','Virus_2h_2','Virus_6h_1','Virus_6h_2','Virus_10h_1','Virus_10h_2','Virus_24h_1','Virus_24h_2'))
#heathyr said I didn't need to make this code because our variables don't need to be organized as a function of time


## based on the gene symbols from plotly, select a few proteins from the table. Select just the data and gene symbols from the pivot longer table you just made 
Examples_Down<-bc_data_long %>% filter(Gene_Symbol=="APOA1" | Gene_Symbol=="HLA-A" | Gene_Symbol=="MYO1B" | Gene_Symbol=="HMGN5") 
  
## make barplots facetted by Gene Symbol
Examples_Down %>% 
  ggplot(aes(x=variable,y=value))+ 
  geom_bar(stat="identity",fill="green")+
  facet_wrap(~Gene_Symbol)+
  theme_bw()+
  theme(axis.text = element_text(colour = "black",size=10))+
  theme(text = element_text(size=14))+
  theme(axis.text.x = element_text(angle=45, hjust=1))+
  labs(x="sample",y="relative intensity")


#changed colours to green
#changes X=order to X=variable because X=order did not exist in my long table
## Same process for the upregulated ones
Examples_Up<-bc_data_long %>% filter(Gene_Symbol=="GCLC" | Gene_Symbol=="FNBP1L" | Gene_Symbol=="DENNd4C"| Gene_Symbol=="KRT23" | Gene_Symbol=="TDP2")

#added all of my down values from the volcano plot, template only had three "gene_symbols" but my graph had  5 so I had to add a few

Examples_Up %>% ggplot(aes(x=variable,y=value))+ 
  geom_bar(stat="identity",fill="pink")+
  facet_wrap(~Gene_Symbol)+
  theme_bw()+
  theme(axis.text = element_text(colour = "black",size=10))+
  theme(text = element_text(size=14))+
  theme(axis.text.x = element_text(angle=45, hjust=1))+
  labs(x="sample",y="relative intensity")


#changed colour to pink
#changed X=order to X= variable because the X=order did no exist on my bc_data_long table