library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
dox_genes <- read.csv("1_Input_files/Dox_responsive_gene.csv")
files <- list.files("1_Input_files/",
pattern = ".tabular")
files
## [1] "cuff_wt_0_01.tabular" "cuff_wt_0_1.tabular" "cuff_wt_0_5.tabular"
for (i in files){
df1 <- read.csv(paste0("1_Input_files/",i),
sep = "\t")
df1 <- subset(df1, df1$gene != "-")
df1 <- separate(data = df1,
col = test_id,
into = c("left", "ID"),
sep = ":")
df1$left <- NULL
#Values in a not in b
df <- anti_join(df1, dox_genes, by='ID')
name <- paste0("Cuffdiff_",
unique(df$sample_1), "_",
unique(df$sample_2), ".csv")
write.table(df,
file= paste0("2_Processed_files/",name),
row.names = FALSE,
quote = FALSE, sep=",")
}
For scatter plots the solid black line is line of best fit. Blue dashed line is X=Y line and the number in the bottom right is the number of differentially expressed genes.
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
library(ggpmisc)
## Loading required package: ggpp
## Registered S3 methods overwritten by 'ggpp':
## method from
## heightDetails.titleGrob ggplot2
## widthDetails.titleGrob ggplot2
##
## Attaching package: 'ggpp'
## The following objects are masked from 'package:ggpubr':
##
## as_npc, as_npcx, as_npcy
## The following object is masked from 'package:ggplot2':
##
## annotate
files <- list.files("2_Processed_files/")
for (i in files){
f <- read.csv(paste0("2_Processed_files/",i),
sep = ",")
xlab <- unique(f$sample_1)
ylab <- unique(f$sample_2)
#Scatter Plot
sp <- ggscatter(f,
x = "value_1",
y = "value_2",
add = "reg.line", # Add regression line
add.params = list(color = "black",
fill = "lightgray"), # Customize reg. line
conf.int = TRUE,
xlab = xlab,
ylab = ylab) +
xlim(0,8000) +
ylim(0,8000) +
geom_point(aes(color = factor(significant))) +
geom_abline(intercept = 0,
slope = 1,
colour = "blue",
linetype = 8) +
stat_poly_eq(use_label(c("eq","R2","p"))) +
annotate(geom="text",
x=6500,
y=50,
label= paste0(
"#",
nrow(subset(f, f$significant == "yes"))),
color="black") +
ggtitle(paste0(ylab , " v/s ", xlab))
plot(sp)
#Volcano plot
temp <- f[,c(10,13,14)]
colnames(temp) <- c("FC", "Qvalue", "Sig")
temp$y_axis <- -log10(temp$Qvalue)
up <- subset(temp, temp$Sig == "yes" &
temp$FC > 0)
down <- subset(temp, temp$Sig == "yes" &
temp$FC < 0)
p <- ggplot(temp, aes(x=FC, y=y_axis)) +
geom_point(aes(color = factor(Sig)),size = 2) +
theme_classic() +
geom_hline(yintercept = -log10(0.05),
col = "gray", linetype = 'dashed') +
geom_vline(xintercept = c(0), col = "gray",
linetype = 'dashed') +
labs(
x = expression(Log[2] ~ "(Fold change)"),
y = expression(-Log[10] ~ "(Q-value)")
) +
ggtitle(
paste0(paste0(ylab , " v/s ", xlab),
"\n",
"Down Regulated= ", nrow(down), "\n",
"Up Regulated= " , nrow(up)
)
)
plot(p)
#Density Plot
temp <- subset(temp, temp$Sig == "yes")
d <- density(temp$FC)
plot(d, main=paste0("Log2FC at ",ylab), ylim = c(0,1.3),
xlim = c(-4,4))
polygon(d, col="red", border="blue")
}
## Warning: Removed 25 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 25 rows containing non-finite outside the scale range
## (`stat_poly_eq()`).
## Warning: Removed 25 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 25 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 26 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 26 rows containing non-finite outside the scale range
## (`stat_poly_eq()`).
## Warning: Removed 26 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 26 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_poly_eq()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Venn and UpSet plot
library(ggvenn)
## Loading required package: grid
library(UpSetR)
cols <- c("#a7c957ff", "#ffd60aff", "#ff8fabff")
f1 <- read.csv("2_Processed_files/Cuffdiff_WT_Dox_0.01.csv")
f2 <- read.csv("2_Processed_files/Cuffdiff_WT_Dox_0.1.csv")
f3 <- read.csv("2_Processed_files/Cuffdiff_WT_Dox_0.5.csv")
f1 <- subset(f1, f1$significant == "yes")
f2 <- subset(f2, f2$significant == "yes")
f3 <- subset(f3, f3$significant == "yes")
x <- list(
Dox_0.01 = f1$gene,
Dox_0.1 = f2$gene,
Dox_0.5 = f3$gene
)
p1 <- ggvenn(
x,
fill_color = cols,
stroke_size = 0.5, set_name_size = 4,
show_percentage = FALSE
)
plot(p1)
upset(fromList(x), order.by = "freq")
library(heatmap3)
library(pheatmap)
f1 <- read.csv("2_Processed_files/Cuffdiff_WT_Dox_0.01.csv")
f2 <- read.csv("2_Processed_files/Cuffdiff_WT_Dox_0.1.csv")
f3 <- read.csv("2_Processed_files/Cuffdiff_WT_Dox_0.5.csv")
a <- f1[,c(3,8,9)]
colnames(a) <- c("Gene","Control","Dox 0.01")
b <- f2[,c(3,9)]
colnames(b) <- c("Gene","Dox 0.1")
c <- f3[,c(3,9)]
colnames(c) <- c("Gene","Dox 0.5")
mer <- merge(a,b,by="Gene")
mer <- merge(mer,c,by="Gene")
row.names(mer) <- mer$Gene
mer$Gene <- NULL
mer <- mer[rowSums(mer == 0) == 0, ]
mer <- as.matrix(mer)
heatmap3(mer, Colv=NA, col = cm.colors(10))
pheatmap(mer, scale = "row",
cluster_cols = F,
show_rownames = F)