library(reshape2)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(viridis)
## Loading required package: viridisLite
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
library(pheatmap)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.4     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ plotly::filter()      masks dplyr::filter(), stats::filter()
## ✖ data.table::first()   masks dplyr::first()
## ✖ lubridate::hour()     masks data.table::hour()
## ✖ lubridate::isoweek()  masks data.table::isoweek()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ data.table::last()    masks dplyr::last()
## ✖ lubridate::mday()     masks data.table::mday()
## ✖ lubridate::minute()   masks data.table::minute()
## ✖ lubridate::month()    masks data.table::month()
## ✖ lubridate::quarter()  masks data.table::quarter()
## ✖ lubridate::second()   masks data.table::second()
## ✖ purrr::transpose()    masks data.table::transpose()
## ✖ lubridate::wday()     masks data.table::wday()
## ✖ lubridate::week()     masks data.table::week()
## ✖ lubridate::yday()     masks data.table::yday()
## ✖ lubridate::year()     masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(clipr)
## Welcome to clipr. See ?write_clip for advisories on writing to the clipboard in R.
library(tidyr)
library(Rcpp)

COLORS

mycolors<-c(viridis(15))
felix_cols<-mycolors[c(5,2)]
felix_4cols<-mycolors[c(15,10,8,2)]
plain_cols1<-c("blue","gray")
plain_cols2<-c("red","gray")

pats_cols<-colorRampPalette(c("#FDE725FF", "white","#440154FF"))(21)
leos_cols<-colorRampPalette(c("white","blue"))(10)

LOAD DATA AND MAKE AN OVERALL HEATMAP

## load the dataset (cells treated with COVID)
cancer<-read_csv(file="breast_cancer_cells.csv")
## Rows: 5144 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): Gene_Symbol, Description
## dbl (15): Peptides, MCF10A_1, MCF10A_2, MCF7_1, MCF7_2, MDA231_1, MDA231_2, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#make a matrix of just raw the data

cancer_mat1<-cancer %>% 
  select(MCF10A_1:SKBR3_2) %>%
  as.matrix() %>%
  round(.,2)
head(cancer_mat1)
##      MCF10A_1 MCF10A_2 MCF7_1 MCF7_2 MDA231_1 MDA231_2 MDA468_1 MDA468_2
## [1,]     9.54     4.58   5.07   5.42    25.43    27.42     4.56     3.88
## [2,]    14.00    11.58   6.49   6.64     9.80    10.31     6.84     8.75
## [3,]    10.22     8.29  11.55  10.82    12.48    10.11     9.54     8.46
## [4,]     9.00     6.35  13.40  14.82    14.94    11.33     6.87     7.92
## [5,]     8.21     4.44  12.08   9.82    16.51    12.34    15.43     8.50
## [6,]    12.51    15.84   8.05   8.38     6.78     8.46     4.48     7.18
##      SKBR3_1 SKBR3_2
## [1,]    8.46    5.64
## [2,]   11.91   13.67
## [3,]    9.10    9.43
## [4,]    8.54    6.82
## [5,]    7.93    4.75
## [6,]   13.27   15.05

Data Manipulation

## first, make new columns that combine the two reps for each variable by averaging them  

cancer_processed <-cancer %>%
  mutate(mean_control = ((MCF10A_1 + MCF10A_2)/2),
  mean_MCF7= ((MCF7_1 + MCF7_2)/2), 
  mean_MDA231= ((MDA231_1 + MDA231_2)/2), 
  mean_MDA468= ((MDA468_1 + MDA468_2)/2),
  mean_SKBR3= ((SKBR3_1 + SKBR3_2)/2)) 
 
head(cancer_processed)
## # A tibble: 6 × 22
##   Gene_Symbol Description      Peptides MCF10A_1 MCF10A_2 MCF7_1 MCF7_2 MDA231_1
##   <chr>       <chr>               <dbl>    <dbl>    <dbl>  <dbl>  <dbl>    <dbl>
## 1 NES         Nestin OS=Homo …        7     9.54     4.58   5.07   5.42    25.4 
## 2 ZC3HC1      Isoform 2 of Nu…        2    14       11.6    6.49   6.64     9.8 
## 3 CAMSAP1L1   Isoform 2 of Ca…        5    10.2      8.29  11.6   10.8     12.5 
## 4 COBRA1      Negative elonga…        3     9        6.35  13.4   14.8     14.9 
## 5 HAUS6       Isoform 3 of HA…        5     8.21     4.44  12.1    9.82    16.5 
## 6 CHCHD4      Isoform 2 of Mi…        5    12.5     15.8    8.05   8.38     6.78
## # ℹ 14 more variables: MDA231_2 <dbl>, MDA468_1 <dbl>, MDA468_2 <dbl>,
## #   SKBR3_1 <dbl>, SKBR3_2 <dbl>, pvalue_MCF7_vs_MCF10A <dbl>,
## #   pvalue_MDA231_vs_MCF10A <dbl>, pvalue_MDA468_vs_MCF10A <dbl>,
## #   pvalue_SKBR3_vs_MCF10A <dbl>, mean_control <dbl>, mean_MCF7 <dbl>,
## #   mean_MDA231 <dbl>, mean_MDA468 <dbl>, mean_SKBR3 <dbl>
#did it create your new columns?
cancer_processed <- cancer_processed %>% 
  mutate(log_MCF7 = log2(mean_MCF7/mean_control), 
         log_MDA231 = log2(mean_MDA231/mean_control),
         log_MDA468 = log2(mean_MDA468/mean_control),
         log_SKBR3 = log2(mean_SKBR3/mean_control))
head(cancer_processed)
## # A tibble: 6 × 26
##   Gene_Symbol Description      Peptides MCF10A_1 MCF10A_2 MCF7_1 MCF7_2 MDA231_1
##   <chr>       <chr>               <dbl>    <dbl>    <dbl>  <dbl>  <dbl>    <dbl>
## 1 NES         Nestin OS=Homo …        7     9.54     4.58   5.07   5.42    25.4 
## 2 ZC3HC1      Isoform 2 of Nu…        2    14       11.6    6.49   6.64     9.8 
## 3 CAMSAP1L1   Isoform 2 of Ca…        5    10.2      8.29  11.6   10.8     12.5 
## 4 COBRA1      Negative elonga…        3     9        6.35  13.4   14.8     14.9 
## 5 HAUS6       Isoform 3 of HA…        5     8.21     4.44  12.1    9.82    16.5 
## 6 CHCHD4      Isoform 2 of Mi…        5    12.5     15.8    8.05   8.38     6.78
## # ℹ 18 more variables: MDA231_2 <dbl>, MDA468_1 <dbl>, MDA468_2 <dbl>,
## #   SKBR3_1 <dbl>, SKBR3_2 <dbl>, pvalue_MCF7_vs_MCF10A <dbl>,
## #   pvalue_MDA231_vs_MCF10A <dbl>, pvalue_MDA468_vs_MCF10A <dbl>,
## #   pvalue_SKBR3_vs_MCF10A <dbl>, mean_control <dbl>, mean_MCF7 <dbl>,
## #   mean_MDA231 <dbl>, mean_MDA468 <dbl>, mean_SKBR3 <dbl>, log_MCF7 <dbl>,
## #   log_MDA231 <dbl>, log_MDA468 <dbl>, log_SKBR3 <dbl>
cancer_mat2<-cancer_processed %>% 
  select(log_MCF7:log_SKBR3) %>%
  as.matrix() %>%
  round(.,2)
head(cancer_mat2)
##      log_MCF7 log_MDA231 log_MDA468 log_SKBR3
## [1,]    -0.43       1.90      -0.74         0
## [2,]    -0.96      -0.35      -0.71         0
## [3,]     0.27       0.29      -0.04         0
## [4,]     0.88       0.78      -0.05         0
## [5,]     0.79       1.19       0.92         0
## [6,]    -0.79      -0.90      -1.28         0
pheatmap(cancer_mat2, color=pats_cols,cellwidth=30,cellheight=.03,cluster_cols=FALSE,cluster_rows=TRUE,legend=TRUE,fontsize = 7,scale="column")

#BASIC VOLCANO PLOT

## Add a column that stores the negative log of the pvalue of interest (in this case, Virus_24h)
cancer_processed<-cancer_processed%>%
  mutate(neglog_test1=-log10(pvalue_MCF7_vs_MCF10A),            
         neglog_test2=-log10(pvalue_MDA231_vs_MCF10A),
         neglog_test3=-log10(pvalue_MDA468_vs_MCF10A),
         neglog_test4=-log10(pvalue_SKBR3_vs_MCF10A))

## Use ggplot to plot the log ratio at 24 hours against the -log p-value at 24 hours
volcano_plot<- cancer_processed %>% ggplot(aes(x=log_MCF7,y=neglog_test1,description=Gene_Symbol))+
  geom_point(alpha=0.7,color="blue")

#to view it, type: 
volcano_plot

## BETTER VOLCANO PLOT

## Define the significant ones (by using ifelse and setting # parameters) so they can be colored
cancer_processed <-cancer_processed %>% mutate(significance=ifelse((log_MCF7>5 & neglog_test1>2.99),"UP", ifelse((log_MCF7<c(-5) & neglog_test1>2.99),"DOWN","NOT SIG")))

## Some standard colors
plain_cols3<-c("red","gray","blue")

## volcano plot as before with some added things
better_volcano_plot<- cancer_processed %>% ggplot(aes(x=log_MCF7,y=neglog_test1,description=Gene_Symbol,color=significance))+
  geom_point(alpha=0.7)+
  scale_color_manual(values=plain_cols3)+
  xlim(-6,6)+
  theme_bw()+
  theme(axis.text = element_text(colour = "black",size=14))+
  theme(text = element_text(size=14))+
  labs(x="log ratio of MCF7 cell line compared to MCF10A control",y="-log(p-value)")
  
#to view it, type
better_volcano_plot
## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).

#check viewer and / or plots to see it 
ggplotly(better_volcano_plot)
# Make a pivot longer table of the C19 data for Control_2h_1:Virus_24h_2
cancer_long<-pivot_longer(cancer_processed, cols = c(MCF10A_1:SKBR3_2), names_to = 'variable')%>%
  select(-c(pvalue_MCF7_vs_MCF10A:significance))

head(cancer_long)
## # A tibble: 6 × 5
##   Gene_Symbol Description                             Peptides variable value
##   <chr>       <chr>                                      <dbl> <chr>    <dbl>
## 1 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MCF10A_1  9.54
## 2 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MCF10A_2  4.58
## 3 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MCF7_1    5.07
## 4 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MCF7_2    5.42
## 5 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MDA231_1 25.4 
## 6 NES         Nestin OS=Homo sapiens GN=NES PE=1 SV=2        7 MDA231_2 27.4
Examples_Down<-cancer_long %>% 
  filter(Gene_Symbol=="APOA1" | Gene_Symbol=="HLA-A") 

head(Examples_Down)
## # A tibble: 6 × 5
##   Gene_Symbol Description                                Peptides variable value
##   <chr>       <chr>                                         <dbl> <chr>    <dbl>
## 1 HLA-A       HLA class I histocompatibility antigen, A…        3 MCF10A_1  5.42
## 2 HLA-A       HLA class I histocompatibility antigen, A…        3 MCF10A_2  6.62
## 3 HLA-A       HLA class I histocompatibility antigen, A…        3 MCF7_1    2.22
## 4 HLA-A       HLA class I histocompatibility antigen, A…        3 MCF7_2    2.69
## 5 HLA-A       HLA class I histocompatibility antigen, A…        3 MDA231_1  4.56
## 6 HLA-A       HLA class I histocompatibility antigen, A…        3 MDA231_2  4.23
## make barplots facetted by Gene Symbol (when working with other data sets - change x=order to x = variable)
Example_plot_down<-Examples_Down %>% 
  ggplot(aes(x=factor(variable,levels=c('MCF10A_1','MCF10A_2','MCF7_1','MCF7_2','MDA231_1','MDA231_2','MDA468_1','MDA468_2','SKBR3_1','SKBR3_2')),y=value))+ 
  geom_bar(stat="identity",fill="red")+
  facet_wrap(~Gene_Symbol)+
  theme_bw()+
  theme(axis.text = element_text(colour = "black",size=10))+
  theme(text = element_text(size=14))+
  theme(axis.text.x = element_text(angle=45, hjust=1))+
  labs(x="sample",y="relative intensity")
  
Example_plot_down

Examples_Up<-cancer_long %>% 
  filter(Gene_Symbol=="FBP2" | Gene_Symbol=="DICER1" | Gene_Symbol=="C17orf28") 

head(Examples_Up)
## # A tibble: 6 × 5
##   Gene_Symbol Description                                Peptides variable value
##   <chr>       <chr>                                         <dbl> <chr>    <dbl>
## 1 FBP2        Fructose-1,6-bisphosphatase isozyme 2 OS=…        2 MCF10A_1  1.13
## 2 FBP2        Fructose-1,6-bisphosphatase isozyme 2 OS=…        2 MCF10A_2  0.76
## 3 FBP2        Fructose-1,6-bisphosphatase isozyme 2 OS=…        2 MCF7_1   43.9 
## 4 FBP2        Fructose-1,6-bisphosphatase isozyme 2 OS=…        2 MCF7_2   41.4 
## 5 FBP2        Fructose-1,6-bisphosphatase isozyme 2 OS=…        2 MDA231_1  2.24
## 6 FBP2        Fructose-1,6-bisphosphatase isozyme 2 OS=…        2 MDA231_2  1.7
## make barplots facetted by Gene Symbol (when working with other data sets - change x=order to x = variable)
Example_plot_Up<-Examples_Up %>% 
  ggplot(aes(x=factor(variable,levels=c('MCF10A_1','MCF10A_2','MCF7_1','MCF7_2','MDA231_1','MDA231_2','MDA468_1','MDA468_2','SKBR3_1','SKBR3_2')),y=value))+ 
  geom_bar(stat="identity",fill="royalblue")+
  facet_wrap(~Gene_Symbol)+
  theme_bw()+
  theme(axis.text = element_text(colour = "black",size=10))+
  theme(text = element_text(size=14))+
  theme(axis.text.x = element_text(angle=45, hjust=1))+
  labs(x="sample",y="relative intensity")
  
Example_plot_Up