library(reshape2)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(viridis)
## Loading required package: viridisLite
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
library(pheatmap)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ data.table::first() masks dplyr::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ data.table::last() masks dplyr::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(clipr)
## Welcome to clipr. See ?write_clip for advisories on writing to the clipboard in R.
library(tidyr)
library(Rcpp)
COLORS
mycolors<-c(viridis(15))
felix_cols<-mycolors[c(5,2)]
felix_4cols<-mycolors[c(15,10,8,2)]
plain_cols1<-c("blue","gray")
plain_cols2<-c("red","gray")
pats_cols<-colorRampPalette(c("#FDE725FF", "white","#440154FF"))(21)
leos_cols<-colorRampPalette(c("white","blue"))(10)
LOAD DATA AND MAKE AN OVERALL HEATMAP
## load the dataset (cells treated with COVID)
cancer<-read_csv(file="breast_cancer_cells.csv")
## Rows: 5144 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Gene_Symbol, Description
## dbl (15): Peptides, MCF10A_1, MCF10A_2, MCF7_1, MCF7_2, MDA231_1, MDA231_2, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#make a matrix of just raw the data
cancer_mat1<-cancer %>%
select(MCF10A_1:SKBR3_2) %>%
as.matrix() %>%
round(.,2)
head(cancer_mat1)
## MCF10A_1 MCF10A_2 MCF7_1 MCF7_2 MDA231_1 MDA231_2 MDA468_1 MDA468_2
## [1,] 9.54 4.58 5.07 5.42 25.43 27.42 4.56 3.88
## [2,] 14.00 11.58 6.49 6.64 9.80 10.31 6.84 8.75
## [3,] 10.22 8.29 11.55 10.82 12.48 10.11 9.54 8.46
## [4,] 9.00 6.35 13.40 14.82 14.94 11.33 6.87 7.92
## [5,] 8.21 4.44 12.08 9.82 16.51 12.34 15.43 8.50
## [6,] 12.51 15.84 8.05 8.38 6.78 8.46 4.48 7.18
## SKBR3_1 SKBR3_2
## [1,] 8.46 5.64
## [2,] 11.91 13.67
## [3,] 9.10 9.43
## [4,] 8.54 6.82
## [5,] 7.93 4.75
## [6,] 13.27 15.05
Data Manipulation
## first, make new columns that combine the two reps for each variable by averaging them
cancer_processed <-cancer %>%
mutate(mean_control = ((MCF10A_1 + MCF10A_2)/2),
mean_MCF7= ((MCF7_1 + MCF7_2)/2),
mean_MDA231= ((MDA231_1 + MDA231_2)/2),
mean_MDA468= ((MDA468_1 + MDA468_2)/2),
mean_SKBR3= ((SKBR3_1 + SKBR3_2)/2))
head(cancer_processed)
## # A tibble: 6 × 22
## Gene_Symbol Description Peptides MCF10A_1 MCF10A_2 MCF7_1 MCF7_2 MDA231_1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NES Nestin OS=Homo … 7 9.54 4.58 5.07 5.42 25.4
## 2 ZC3HC1 Isoform 2 of Nu… 2 14 11.6 6.49 6.64 9.8
## 3 CAMSAP1L1 Isoform 2 of Ca… 5 10.2 8.29 11.6 10.8 12.5
## 4 COBRA1 Negative elonga… 3 9 6.35 13.4 14.8 14.9
## 5 HAUS6 Isoform 3 of HA… 5 8.21 4.44 12.1 9.82 16.5
## 6 CHCHD4 Isoform 2 of Mi… 5 12.5 15.8 8.05 8.38 6.78
## # ℹ 14 more variables: MDA231_2 <dbl>, MDA468_1 <dbl>, MDA468_2 <dbl>,
## # SKBR3_1 <dbl>, SKBR3_2 <dbl>, pvalue_MCF7_vs_MCF10A <dbl>,
## # pvalue_MDA231_vs_MCF10A <dbl>, pvalue_MDA468_vs_MCF10A <dbl>,
## # pvalue_SKBR3_vs_MCF10A <dbl>, mean_control <dbl>, mean_MCF7 <dbl>,
## # mean_MDA231 <dbl>, mean_MDA468 <dbl>, mean_SKBR3 <dbl>
#did it create your new columns?
cancer_processed <- cancer_processed %>%
mutate(log_MCF7 = log2(mean_MCF7/mean_control),
log_MDA231 = log2(mean_MDA231/mean_control),
log_MDA468 = log2(mean_MDA468/mean_control),
log_SKBR3 = log2(mean_SKBR3/mean_control))
head(cancer_processed)
## # A tibble: 6 × 26
## Gene_Symbol Description Peptides MCF10A_1 MCF10A_2 MCF7_1 MCF7_2 MDA231_1
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NES Nestin OS=Homo … 7 9.54 4.58 5.07 5.42 25.4
## 2 ZC3HC1 Isoform 2 of Nu… 2 14 11.6 6.49 6.64 9.8
## 3 CAMSAP1L1 Isoform 2 of Ca… 5 10.2 8.29 11.6 10.8 12.5
## 4 COBRA1 Negative elonga… 3 9 6.35 13.4 14.8 14.9
## 5 HAUS6 Isoform 3 of HA… 5 8.21 4.44 12.1 9.82 16.5
## 6 CHCHD4 Isoform 2 of Mi… 5 12.5 15.8 8.05 8.38 6.78
## # ℹ 18 more variables: MDA231_2 <dbl>, MDA468_1 <dbl>, MDA468_2 <dbl>,
## # SKBR3_1 <dbl>, SKBR3_2 <dbl>, pvalue_MCF7_vs_MCF10A <dbl>,
## # pvalue_MDA231_vs_MCF10A <dbl>, pvalue_MDA468_vs_MCF10A <dbl>,
## # pvalue_SKBR3_vs_MCF10A <dbl>, mean_control <dbl>, mean_MCF7 <dbl>,
## # mean_MDA231 <dbl>, mean_MDA468 <dbl>, mean_SKBR3 <dbl>, log_MCF7 <dbl>,
## # log_MDA231 <dbl>, log_MDA468 <dbl>, log_SKBR3 <dbl>
cancer_mat2<-cancer_processed %>%
select(log_MCF7:log_SKBR3) %>%
as.matrix() %>%
round(.,2)
head(cancer_mat2)
## log_MCF7 log_MDA231 log_MDA468 log_SKBR3
## [1,] -0.43 1.90 -0.74 0
## [2,] -0.96 -0.35 -0.71 0
## [3,] 0.27 0.29 -0.04 0
## [4,] 0.88 0.78 -0.05 0
## [5,] 0.79 1.19 0.92 0
## [6,] -0.79 -0.90 -1.28 0
pheatmap(cancer_mat2, color=pats_cols,cellwidth=30,cellheight=.03,cluster_cols=FALSE,cluster_rows=TRUE,legend=TRUE,fontsize = 7,scale="column")

#BASIC VOLCANO PLOT
## Add a column that stores the negative log of the pvalue of interest (in this case, Virus_24h)
cancer_processed<-cancer_processed%>%
mutate(neglog_test1=-log10(pvalue_MCF7_vs_MCF10A),
neglog_test2=-log10(pvalue_MDA231_vs_MCF10A),
neglog_test3=-log10(pvalue_MDA468_vs_MCF10A),
neglog_test4=-log10(pvalue_SKBR3_vs_MCF10A))
## Use ggplot to plot the log ratio at 24 hours against the -log p-value at 24 hours
volcano_plot<- cancer_processed %>% ggplot(aes(x=log_MCF7,y=neglog_test1,description=Gene_Symbol))+
geom_point(alpha=0.7,color="blue")
#to view it, type:
volcano_plot

## BETTER VOLCANO PLOT
## Define the significant ones (by using ifelse and setting # parameters) so they can be colored
cancer_processed <-cancer_processed %>% mutate(significance=ifelse((log_MCF7>5 & neglog_test1>2.99),"UP", ifelse((log_MCF7<c(-5) & neglog_test1>2.99),"DOWN","NOT SIG")))
## Some standard colors
plain_cols3<-c("red","gray","blue")
## volcano plot as before with some added things
better_volcano_plot<- cancer_processed %>% ggplot(aes(x=log_MCF7,y=neglog_test1,description=Gene_Symbol,color=significance))+
geom_point(alpha=0.7)+
scale_color_manual(values=plain_cols3)+
xlim(-6,6)+
theme_bw()+
theme(axis.text = element_text(colour = "black",size=14))+
theme(text = element_text(size=14))+
labs(x="log ratio of MCF7 cell line compared to MCF10A control",y="-log(p-value)")
#to view it, type
better_volcano_plot
## Warning: Removed 18 rows containing missing values or values outside the scale range
## (`geom_point()`).

#check viewer and / or plots to see it
ggplotly(better_volcano_plot)
# Make a pivot longer table of the C19 data for Control_2h_1:Virus_24h_2
cancer_long<-pivot_longer(cancer_processed, cols = c(MCF10A_1:SKBR3_2), names_to = 'variable')%>%
select(-c(pvalue_MCF7_vs_MCF10A:significance))
head(cancer_long)
## # A tibble: 6 × 5
## Gene_Symbol Description Peptides variable value
## <chr> <chr> <dbl> <chr> <dbl>
## 1 NES Nestin OS=Homo sapiens GN=NES PE=1 SV=2 7 MCF10A_1 9.54
## 2 NES Nestin OS=Homo sapiens GN=NES PE=1 SV=2 7 MCF10A_2 4.58
## 3 NES Nestin OS=Homo sapiens GN=NES PE=1 SV=2 7 MCF7_1 5.07
## 4 NES Nestin OS=Homo sapiens GN=NES PE=1 SV=2 7 MCF7_2 5.42
## 5 NES Nestin OS=Homo sapiens GN=NES PE=1 SV=2 7 MDA231_1 25.4
## 6 NES Nestin OS=Homo sapiens GN=NES PE=1 SV=2 7 MDA231_2 27.4
Examples_Down<-cancer_long %>%
filter(Gene_Symbol=="APOA1" | Gene_Symbol=="HLA-A")
head(Examples_Down)
## # A tibble: 6 × 5
## Gene_Symbol Description Peptides variable value
## <chr> <chr> <dbl> <chr> <dbl>
## 1 HLA-A HLA class I histocompatibility antigen, A… 3 MCF10A_1 5.42
## 2 HLA-A HLA class I histocompatibility antigen, A… 3 MCF10A_2 6.62
## 3 HLA-A HLA class I histocompatibility antigen, A… 3 MCF7_1 2.22
## 4 HLA-A HLA class I histocompatibility antigen, A… 3 MCF7_2 2.69
## 5 HLA-A HLA class I histocompatibility antigen, A… 3 MDA231_1 4.56
## 6 HLA-A HLA class I histocompatibility antigen, A… 3 MDA231_2 4.23
## make barplots facetted by Gene Symbol (when working with other data sets - change x=order to x = variable)
Example_plot_down<-Examples_Down %>%
ggplot(aes(x=factor(variable,levels=c('MCF10A_1','MCF10A_2','MCF7_1','MCF7_2','MDA231_1','MDA231_2','MDA468_1','MDA468_2','SKBR3_1','SKBR3_2')),y=value))+
geom_bar(stat="identity",fill="red")+
facet_wrap(~Gene_Symbol)+
theme_bw()+
theme(axis.text = element_text(colour = "black",size=10))+
theme(text = element_text(size=14))+
theme(axis.text.x = element_text(angle=45, hjust=1))+
labs(x="sample",y="relative intensity")
Example_plot_down

Examples_Up<-cancer_long %>%
filter(Gene_Symbol=="FBP2" | Gene_Symbol=="DICER1" | Gene_Symbol=="C17orf28")
head(Examples_Up)
## # A tibble: 6 × 5
## Gene_Symbol Description Peptides variable value
## <chr> <chr> <dbl> <chr> <dbl>
## 1 FBP2 Fructose-1,6-bisphosphatase isozyme 2 OS=… 2 MCF10A_1 1.13
## 2 FBP2 Fructose-1,6-bisphosphatase isozyme 2 OS=… 2 MCF10A_2 0.76
## 3 FBP2 Fructose-1,6-bisphosphatase isozyme 2 OS=… 2 MCF7_1 43.9
## 4 FBP2 Fructose-1,6-bisphosphatase isozyme 2 OS=… 2 MCF7_2 41.4
## 5 FBP2 Fructose-1,6-bisphosphatase isozyme 2 OS=… 2 MDA231_1 2.24
## 6 FBP2 Fructose-1,6-bisphosphatase isozyme 2 OS=… 2 MDA231_2 1.7
## make barplots facetted by Gene Symbol (when working with other data sets - change x=order to x = variable)
Example_plot_Up<-Examples_Up %>%
ggplot(aes(x=factor(variable,levels=c('MCF10A_1','MCF10A_2','MCF7_1','MCF7_2','MDA231_1','MDA231_2','MDA468_1','MDA468_2','SKBR3_1','SKBR3_2')),y=value))+
geom_bar(stat="identity",fill="royalblue")+
facet_wrap(~Gene_Symbol)+
theme_bw()+
theme(axis.text = element_text(colour = "black",size=10))+
theme(text = element_text(size=14))+
theme(axis.text.x = element_text(angle=45, hjust=1))+
labs(x="sample",y="relative intensity")
Example_plot_Up
