So, lets load RMarkdown

library(rmarkdown)

set messages to FALSE on everything (prevents certain boring things from being shown in the results)

knitr::opts_chunk$set(echo=TRUE,
                      message=FALSE,
                      warning=FALSE,
                      collapse=TRUE)

PACKAGES

library(reshape2)
library(ggplot2)
library(dplyr)
library(plotly)
library(viridis)
library(data.table)
library(pheatmap)
library(tidyverse)
library(ggthemes)
library(clipr)
library(tidyr)

COLORS (feel free to mess around and find your own favorites if you like)

mycolors<-c(viridis(15))
felix_cols<-mycolors[c(5,2)]
felix_4cols<-mycolors[c(15,10,8,2)]
plain_cols1<-c("blue","gray")
plain_cols2<-c("red","gray")


## Didn't know how to get the colours I wanted for my bar plots so I had to do it really weird
barcolour1<-c("firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick")

barcolour2<-c("firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick1","firebrick1","firebrick","firebrick","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick","firebrick1","firebrick1","firebrick","firebrick")

pats_cols<-colorRampPalette(c("#FDE725FF","white","#440154FF"))(21)
leos_cols<-colorRampPalette(c("white","blue"))(10)

LOAD DATA AND MAKE AN OVERALL HEATMAP


## Dataset import
C19_data<-read_csv(file="complex_I_inhibitors.csv")

## Log ratios of expression (treatment/control)
C19_data<-C19_data %>%
  mutate(log_phenformin=log2((Phenformin_1+Phenformin_2)/(PBS_1+PBS_2)),
                              log_piericidin=log2((Piericidin_1+Piericidin_2)/(PBS_1+PBS_2)),
                              log_rotenone=log2((Rotenone_1+Rotenone_2)/(PBS_1+PBS_2)),
                              log_mpp=log2((MPP_1+MPP_2)/(PBS_1+PBS_2)))
arrange(C19_data, desc(Uniprot_Accession))
## # A tibble: 6,244 x 22
##    Uniprot_Accession Gene_Symbol Protein_Description        Peptides PBS_1 PBS_2
##    <chr>             <chr>       <chr>                         <dbl> <dbl> <dbl>
##  1 R4GN35            DENND4C     R4GN35_HUMAN DENN domain-…        5   8.6  10.4
##  2 R4GMX3            COMMD3-BMI1 R4GMX3_HUMAN Protein COMM…        1  12.6  11.5
##  3 Q9Y6Y8            SEC23IP     S23IP_HUMAN SEC23-interac…       22   9.3   8.6
##  4 Q9Y6Y0            IVNS1ABP    NS1BP_HUMAN Influenza vir…        7  12.1  10.3
##  5 Q9Y6X9            MORC2       MORC2_HUMAN MORC family C…       16  11.8  11.6
##  6 Q9Y6X4            FAM169A     F169A_HUMAN Soluble lamin…        1   9.9  12.3
##  7 Q9Y6X3            MAU2        SCC4_HUMAN MAU2 chromatid…        2  12.4  12.6
##  8 Q9Y6W5            WASF2       WASF2_HUMAN Wiskott-Aldri…        9   7.5  10.7
##  9 Q9Y6W3            CAPN7       CAN7_HUMAN Calpain-7              1  10.1  13.3
## 10 Q9Y6V7            DDX49       DDX49_HUMAN Probable ATP-…        8  11    13  
## # … with 6,234 more rows, and 16 more variables: Phenformin_1 <dbl>,
## #   Phenformin_2 <dbl>, Piericidin_1 <dbl>, Piericidin_2 <dbl>,
## #   Rotenone_1 <dbl>, Rotenone_2 <dbl>, MPP_1 <dbl>, MPP_2 <dbl>,
## #   pvalue_phenformin_vs_PBS <dbl>, pvalue_piericidin_vs_PBS <dbl>,
## #   pvalue_rotenone_vs_PBS <dbl>, pvalue_MPP_vs_PBS <dbl>,
## #   log_phenformin <dbl>, log_piericidin <dbl>, log_rotenone <dbl>,
## #   log_mpp <dbl>

## Matrix

C19_mat1<-C19_data %>%
  select(PBS_1:MPP_2) %>%
  as.matrix() %>%
  round(.,2)

## Heatmap from all treatments

pheatmap(C19_mat1,
         color=pats_cols,
         cellwidth=30,
         cellheight=0.07,
         cluster_cols=FALSE,
         cluster_rows=FALSE,
         legend=TRUE,
         fontsize=7,
         scale="row")

## Piericidin looks most different (visually), so from here on out focusing on piericidin

PLOTTING A VOLCANO

##BASIC VOLCANO PLOT

## -log10(p-values)

C19_data<-C19_data %>%
  mutate(log_neg_p=-log(pvalue_piericidin_vs_PBS))

## Initial volcano plot (not final)

volcano<- C19_data %>%
  ggplot(aes(x=log_piericidin,
             y=log_neg_p,
             description=Gene_Symbol)) +
  geom_point(alpha=0.7,
             colour="blue")
volcano

BETTER VOLCANO PLOT


## Define statistical significance for volcano plot

C19_data<-C19_data %>%
  mutate(significant=ifelse((log_piericidin>2 & log_neg_p>2.99),
                                                 "UP",
                                                  ifelse((log_piericidin<c(-2) & log_neg_p>2.99),
                                                         "NEG",
                                                         "NOT SIG")))



## Plain colours for volcano plot

plain_cols3<-c("red", "gray", "blue")

## New volcano plot (with integrated colouring scheme by statistical significance)

volcano2<- C19_data %>%
  ggplot(aes(x=log_piericidin,
             y=log_neg_p,
             description=Gene_Symbol,
             colour=significant)) +
  geom_point(alpha=0.7) +
  scale_colour_manual(values=plain_cols3) +
  xlim(-6,6) +
  theme_bw() +
  theme(axis.text=element_text(colour="black",
                               size=14)) +
  theme(text=element_text(size=14)) +
  labs(x="Log Ratio of Piericidin to Control Expression",
       y="Negative Log of P-value")


## Visual inspection of volcano plot (ggplotly allows mousing over of points to show gene names)

ggplotly(volcano2)

EXAMPLES OF A COUPLE PROTEINS




## Melt proteins that significantly Increased

products_up<-C19_data %>%
  filter(Gene_Symbol=="HSP90AB1" | Gene_Symbol=="DPY30" | Gene_Symbol=="RPS29" | Gene_Symbol=="RNASEH1" | Gene_Symbol=="TMEM179B" | Gene_Symbol=="NAT14" | Gene_Symbol=="SDF2" | Gene_Symbol=="NUDT5" | Gene_Symbol=="PFDN4")%>%
  select(Gene_Symbol, PBS_1, PBS_2, Piericidin_1, Piericidin_2) %>%
  melt()

## Barplot for proteins that significantly increased

products_up_plot<-products_up %>%
  ggplot(aes(x=variable,
             y=value)) +
  geom_bar(stat="identity",
           fill=barcolour2) +
  facet_wrap(~Gene_Symbol) +
  theme_bw() +
  theme(axis.text=element_text(colour="black",
                               size=10)) +
  theme(text=element_text(size=14)) +
  theme(axis.text.x=element_text(angle=45,
                                 hjust=1)) +
  labs(x="Treatment",
       y="Relative Protein Expression")
products_up_plot


## Melt proteins that significantly decreased

products_down<-C19_data %>%
  filter(Gene_Symbol=="SEC61B" | Gene_Symbol=="ZNF579" | Gene_Symbol=="PTPLA" | Gene_Symbol=="IER3IP1" | Gene_Symbol=="MT-ND1" | Gene_Symbol=="MGST3" | Gene_Symbol=="TTYH3" | Gene_Symbol=="USP1" | Gene_Symbol=="CYB561")%>%
  select(Gene_Symbol, PBS_1, PBS_2, Piericidin_1, Piericidin_2) %>%
  melt()

## Barplot for proteins that significantly decreased

products_down_plot<-products_down %>%
  ggplot(aes(x=variable,
             y=value)) +
  geom_bar(stat="identity",fill=barcolour1) +
  facet_wrap(~Gene_Symbol) +
  theme_bw() +
  theme(axis.text=element_text(colour="black",
                               size=10)) +
  theme(text=element_text(size=14)) +
  theme(axis.text.x=element_text(angle=45,
                                 hjust=1)) +
  labs(x="Treatment",
       y="Relative Protein Expression")
products_down_plot