So, lets load RMarkdown
library(rmarkdown)
set messages to FALSE on everything (prevents certain boring things from being shown in the results)
knitr::opts_chunk$set(echo=TRUE,
message=FALSE,
warning=FALSE,
collapse=TRUE)
PACKAGES
library(reshape2)
library(ggplot2)
library(dplyr)
library(plotly)
library(viridis)
library(data.table)
library(pheatmap)
library(tidyverse)
library(ggthemes)
library(clipr)
library(tidyr)
COLORS (feel free to mess around and find your own favorites if you like)
mycolors<-c(viridis(15))
felix_cols<-mycolors[c(5,2)]
felix_4cols<-mycolors[c(15,10,8,2)]
plain_cols1<-c("blue","gray")
plain_cols2<-c("red","gray")
## Didn't know how to get the colours I wanted for my bar plots so I had to do it really weird
barcolour1<-c("firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick")
barcolour2<-c("firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick1","firebrick1","firebrick","firebrick","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick")
pats_cols<-colorRampPalette(c("#FDE725FF","white","#440154FF"))(21)
leos_cols<-colorRampPalette(c("white","blue"))(10)
LOAD DATA AND MAKE AN OVERALL HEATMAP
## Dataset import
C19_data<-read_csv(file="complex_I_inhibitors.csv")
## Log ratios of expression (treatment/control)
C19_data<-C19_data %>%
mutate(log_phenformin=log2((Phenformin_1+Phenformin_2)/(PBS_1+PBS_2)),
log_piericidin=log2((Piericidin_1+Piericidin_2)/(PBS_1+PBS_2)),
log_rotenone=log2((Rotenone_1+Rotenone_2)/(PBS_1+PBS_2)),
log_mpp=log2((MPP_1+MPP_2)/(PBS_1+PBS_2)))
arrange(C19_data, desc(Uniprot_Accession))
## # A tibble: 6,244 x 22
## Uniprot_Accession Gene_Symbol Protein_Description Peptides PBS_1 PBS_2
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 R4GN35 DENND4C R4GN35_HUMAN DENN domain-… 5 8.6 10.4
## 2 R4GMX3 COMMD3-BMI1 R4GMX3_HUMAN Protein COMM… 1 12.6 11.5
## 3 Q9Y6Y8 SEC23IP S23IP_HUMAN SEC23-interac… 22 9.3 8.6
## 4 Q9Y6Y0 IVNS1ABP NS1BP_HUMAN Influenza vir… 7 12.1 10.3
## 5 Q9Y6X9 MORC2 MORC2_HUMAN MORC family C… 16 11.8 11.6
## 6 Q9Y6X4 FAM169A F169A_HUMAN Soluble lamin… 1 9.9 12.3
## 7 Q9Y6X3 MAU2 SCC4_HUMAN MAU2 chromatid… 2 12.4 12.6
## 8 Q9Y6W5 WASF2 WASF2_HUMAN Wiskott-Aldri… 9 7.5 10.7
## 9 Q9Y6W3 CAPN7 CAN7_HUMAN Calpain-7 1 10.1 13.3
## 10 Q9Y6V7 DDX49 DDX49_HUMAN Probable ATP-… 8 11 13
## # … with 6,234 more rows, and 16 more variables: Phenformin_1 <dbl>,
## # Phenformin_2 <dbl>, Piericidin_1 <dbl>, Piericidin_2 <dbl>,
## # Rotenone_1 <dbl>, Rotenone_2 <dbl>, MPP_1 <dbl>, MPP_2 <dbl>,
## # pvalue_phenformin_vs_PBS <dbl>, pvalue_piericidin_vs_PBS <dbl>,
## # pvalue_rotenone_vs_PBS <dbl>, pvalue_MPP_vs_PBS <dbl>,
## # log_phenformin <dbl>, log_piericidin <dbl>, log_rotenone <dbl>,
## # log_mpp <dbl>
## Matrix
C19_mat1<-C19_data %>%
select(PBS_1:MPP_2) %>%
as.matrix() %>%
round(.,2)
## Heatmap from all treatments
pheatmap(C19_mat1,
color=pats_cols,
cellwidth=30,
cellheight=0.07,
cluster_cols=FALSE,
cluster_rows=FALSE,
legend=TRUE,
fontsize=7,
scale="row")

## Piericidin looks most different (visually), so from here on out focusing on piericidin
PLOTTING A VOLCANO
##BASIC VOLCANO PLOT
## -log10(p-values)
C19_data<-C19_data %>%
mutate(log_neg_p=-log(pvalue_piericidin_vs_PBS))
## Initial volcano plot (not final)
volcano<- C19_data %>%
ggplot(aes(x=log_piericidin,
y=log_neg_p,
description=Gene_Symbol)) +
geom_point(alpha=0.7,
colour="blue")
volcano

BETTER VOLCANO PLOT
## Define statistical significance for volcano plot
C19_data<-C19_data %>%
mutate(significant=ifelse((log_piericidin>2 & log_neg_p>2.99),
"UP",
ifelse((log_piericidin<c(-2) & log_neg_p>2.99),
"NEG",
"NOT SIG")))
## Plain colours for volcano plot
plain_cols3<-c("red", "gray", "blue")
## New volcano plot (with integrated colouring scheme by statistical significance)
volcano2<- C19_data %>%
ggplot(aes(x=log_piericidin,
y=log_neg_p,
description=Gene_Symbol,
colour=significant)) +
geom_point(alpha=0.7) +
scale_colour_manual(values=plain_cols3) +
xlim(-6,6) +
theme_bw() +
theme(axis.text=element_text(colour="black",
size=14)) +
theme(text=element_text(size=14)) +
labs(x="Log Ratio of Piericidin to Control Expression",
y="Negative Log of P-value")
## Visual inspection of volcano plot (ggplotly allows mousing over of points to show gene names)
ggplotly(volcano2)
EXAMPLES OF A COUPLE PROTEINS
## Melt proteins that significantly Increased
products_up<-C19_data %>%
filter(Gene_Symbol=="HSP90AB1" | Gene_Symbol=="DPY30" | Gene_Symbol=="RPS29" | Gene_Symbol=="RNASEH1" | Gene_Symbol=="TMEM179B" | Gene_Symbol=="NAT14" | Gene_Symbol=="SDF2" | Gene_Symbol=="NUDT5" | Gene_Symbol=="PFDN4")%>%
select(Gene_Symbol, PBS_1, PBS_2, Piericidin_1, Piericidin_2) %>%
melt()
## Barplot for proteins that significantly increased
products_up_plot<-products_up %>%
ggplot(aes(x=variable,
y=value)) +
geom_bar(stat="identity",
fill=barcolour2) +
facet_wrap(~Gene_Symbol) +
theme_bw() +
theme(axis.text=element_text(colour="black",
size=10)) +
theme(text=element_text(size=14)) +
theme(axis.text.x=element_text(angle=45,
hjust=1)) +
labs(x="Treatment",
y="Relative Protein Expression")
products_up_plot

## Melt proteins that significantly decreased
products_down<-C19_data %>%
filter(Gene_Symbol=="SEC61B" | Gene_Symbol=="ZNF579" | Gene_Symbol=="PTPLA" | Gene_Symbol=="IER3IP1" | Gene_Symbol=="MT-ND1" | Gene_Symbol=="MGST3" | Gene_Symbol=="TTYH3" | Gene_Symbol=="USP1" | Gene_Symbol=="CYB561")%>%
select(Gene_Symbol, PBS_1, PBS_2, Piericidin_1, Piericidin_2) %>%
melt()
## Barplot for proteins that significantly decreased
products_down_plot<-products_down %>%
ggplot(aes(x=variable,
y=value)) +
geom_bar(stat="identity",fill=barcolour1) +
facet_wrap(~Gene_Symbol) +
theme_bw() +
theme(axis.text=element_text(colour="black",
size=10)) +
theme(text=element_text(size=14)) +
theme(axis.text.x=element_text(angle=45,
hjust=1)) +
labs(x="Treatment",
y="Relative Protein Expression")
products_down_plot
