####Run through this example and try to understand what is going on with the data
####So, lets load RMarkdown
library(rmarkdown)
knitr::opts_chunk$set(echo = TRUE, message=FALSE,warning=FALSE,collapse = TRUE)
library(reshape2)
library(ggplot2)
library(dplyr)
library(plotly)
library(viridis)
library(data.table)
library(pheatmap)
library(tidyverse)
library(ggthemes)
library(clipr)
library(tidyr)
library(Rcpp)
mycolors<-c(viridis(15))
felix_cols<-mycolors[c(5,2)]
felix_4cols<-mycolors[c(15,10,8,2)]
plain_cols1<-c("blue","gray")
plain_cols2<-c("red","gray")
pats_cols<-colorRampPalette(c("#FDE725FF", "white","#440154FF"))(21)
leos_cols<-colorRampPalette(c("white","blue"))(10)
## load the dataset (cells treated with COVID)
C19_data<-read_csv(file="COVID_test.csv")
## make some new columns that store the log ratios
C19_data<-C19_data %>% mutate(
log_2h=log2((Virus_2h_1+Virus_2h_2)/(Control_2h_1+Control_2h_2)),
log_6h=log2((Virus_6h_1+Virus_6h_2)/(Control_2h_1+Control_2h_2)),
log_10h=log2((Virus_10h_1+Virus_10h_2)/(Control_2h_1+Control_2h_2)),
log_24h=log2((Virus_24h_1+Virus_24h_2)/(Control_2h_1+Control_2h_2)))
view(C19_data)
## make a matrix of just log values the data
C19_mat1<-C19_data %>% select(log_2h:log_24h) %>% as.matrix() %>% round(.,2)
head(C19_mat1)
## log_2h log_6h log_10h log_24h
## [1,] 0.14 0.71 2.69 6.05
## [2,] -0.01 1.12 3.13 6.04
## [3,] 0.02 0.60 1.76 4.86
## [4,] 0.00 0.84 2.63 4.82
## [5,] 0.03 0.31 1.57 4.73
## [6,] 0.02 1.47 3.53 4.71
## make a heatmap from the data
pheatmap(C19_mat1, color=pats_cols,cellwidth=30,cellheight=.03,cluster_cols=FALSE,cluster_rows=FALSE,legend=TRUE,fontsize = 7,scale="column")
#looks like 24hrs is different than the other cells lines. Let's take a closer look at that
##BASIC VOLCANO PLOT
## Add a column that stores the negative log of the pvalue of interest (in this case, Virus_24h)
C19_data<-C19_data %>% mutate(neglog_24h=-log10(P_value_24h))
## Use ggplot to plot the log ratio at 24 hours against the -log p-value at 24 hours
volcano_plot<-C19_data %>% ggplot(aes(x=log_24h,y=neglog_24h,description=Gene_Symbol))+
geom_point(alpha=0.7,color="royalblue")
volcano_plot
## BETTER VOLCANO PLOT
## Define the significant ones (by using ifelse and setting # parameters) so they can be colored
C19_data<-C19_data %>% mutate(significance=ifelse((log_24h>2 & neglog_24h>2.99),"UP", ifelse((log_24h<c(-2) & neglog_24h>2.99),"DOWN","NOT SIG")))
## Some standard colors
plain_cols3<-c("red","gray","blue")
## volcano plot as before with some added things
better_volcano_plot<-C19_data %>% ggplot(aes(x=log_24h,y=neglog_24h,description=Gene_Symbol,color=significance))+
geom_point(alpha=0.7)+
scale_color_manual(values=plain_cols3)+
xlim(-6,6)+
theme_bw()+
theme(axis.text = element_text(colour = "black",size=14))+
theme(text = element_text(size=14))+
labs(x="log ratio at 24 hrs COVID-19 infection compared to control",y="-log(p-value)")
better_volcano_plot
## use ggplotly to see hover over the points to see the gene names. Record these for the next step
ggplotly(better_volcano_plot)
## Significant up regulated are: ___, ___ and _____
## Significant down regulated are: _____. _____ and ____
# Make a pivot longer table of the C19 data for Control_2h_1:Virus_24h_2
C19_long<-pivot_longer(C19_data, cols = c(Control_2h_1:Virus_24h_2), names_to = 'variable')%>% select(-c(P_value_2h:significance))%>%select(-Accession)
head(C19_long)
## # A tibble: 6 × 3
## Gene_Symbol variable value
## <chr> <chr> <dbl>
## 1 Viral_Protein_1 Control_2h_1 0.53
## 2 Viral_Protein_1 Control_2h_2 0.78
## 3 Viral_Protein_1 Virus_2h_1 0.6
## 4 Viral_Protein_1 Virus_2h_2 0.84
## 5 Viral_Protein_1 Virus_6h_1 0.64
## 6 Viral_Protein_1 Virus_6h_2 1.51
## Because ggplot will automatically plot alphabetically on the x-axis, we need to make sure that our timepoints will be in the proper order by adding a new column using the factor() function
#this should be excluded in other datasets where order is not needed
C19_long$order <- as.character(C19_long$variable)
C19_long$order <- factor(C19_long$order,levels=c('Control_2h_1','Control_2h_2','Virus_2h_1','Virus_2h_2','Virus_6h_1','Virus_6h_2','Virus_10h_1','Virus_10h_2','Virus_24h_1','Virus_24h_2'))
## based on the gene symbols from plotly, select a few proteins from the table. Select just the data and gene symbols and pivot longer
Examples_Down<-C19_long %>% filter(Gene_Symbol=="SHH" | Gene_Symbol=="APLP2;APP" | Gene_Symbol=="TSPYL1")
head(Examples_Down)
## # A tibble: 6 × 4
## Gene_Symbol variable value order
## <chr> <chr> <dbl> <fct>
## 1 TSPYL1 Control_2h_1 14.5 Control_2h_1
## 2 TSPYL1 Control_2h_2 13.4 Control_2h_2
## 3 TSPYL1 Virus_2h_1 11.4 Virus_2h_1
## 4 TSPYL1 Virus_2h_2 13.1 Virus_2h_2
## 5 TSPYL1 Virus_6h_1 10.9 Virus_6h_1
## 6 TSPYL1 Virus_6h_2 12.2 Virus_6h_2
## make barplots facetted by Gene Symbol (when working with other data sets - change x=order to x = variable)
Example_plot_down<-Examples_Down %>%
ggplot(aes(x=variable,y=value))+
geom_bar(stat="identity",fill="red")+
facet_wrap(~Gene_Symbol)+
theme_bw()+
theme(axis.text = element_text(colour = "black",size=10))+
theme(text = element_text(size=14))+
theme(axis.text.x = element_text(angle=45, hjust=1))+
labs(x="sample",y="relative intensity")
Example_plot_down
## Same process for the upregulated ones
Examples_Up<-C19_long %>% filter(Gene_Symbol=="Viral_Protein_4" | Gene_Symbol=="Viral_Protein_8")
head(Examples_Up)
## # A tibble: 6 × 4
## Gene_Symbol variable value order
## <chr> <chr> <dbl> <fct>
## 1 Viral_Protein_4 Control_2h_1 1.21 Control_2h_1
## 2 Viral_Protein_4 Control_2h_2 1.4 Control_2h_2
## 3 Viral_Protein_4 Virus_2h_1 1.22 Virus_2h_1
## 4 Viral_Protein_4 Virus_2h_2 1.39 Virus_2h_2
## 5 Viral_Protein_4 Virus_6h_1 2.14 Virus_6h_1
## 6 Viral_Protein_4 Virus_6h_2 2.54 Virus_6h_2
Example_plot_up<-Examples_Up %>% ggplot(aes(x=variable,y=value))+
geom_bar(stat="identity",fill="royalblue")+
facet_wrap(~Gene_Symbol)+
theme_bw()+
theme(axis.text = element_text(colour = "black",size=10))+
theme(text = element_text(size=14))+
theme(axis.text.x = element_text(angle=45, hjust=1))+
labs(x="sample",y="relative intensity")
Example_plot_up
##now you can knit this and publish to save and share your code. Use this to work with either the brain or breast cells and the Part_C_template to complete your lab 6 ELN. ##Annotate when you have trouble and reference which line of code you need help on ## good luck and have fun!