library(dplyr)
library(tidyr)
library(cowplot)
library(UpSetR)
library(gridExtra)
library(ggplot2)
A_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_A.csv", header =T)
B_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_B.csv", header =T)
C_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_C.csv", header =T)
D_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_D.csv", header =T)
E_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_E.csv", header =T)
F_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_F.csv", header =T)
Combine <- na.omit(rbind(A_Group,B_Group,C_Group,D_Group,E_Group,F_Group))
names(Combine)[8] = "GeneID"
head(Combine, 50)
### Filter differentially expressed genes based on following creteria
donw_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange < -1,]
donw_gene <- donw_gene %>% group_by(Sample) %>% mutate(Down = n())
donw_summary <- unique(donw_gene[,c("Sample","Down")])
up_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange > 1,]
up_gene <- up_gene %>% group_by(Sample) %>% mutate(up = n())
up_summary <- unique(up_gene[,c("Sample","up")])
combind_summary <- left_join(up_summary, donw_summary, by = "Sample")
head(combind_summary, 40)
plot_summary <- combind_summary %>% separate(Sample, c("Accession", "Time"))
plot_summary[is.na(plot_summary)] = 0
#write.csv(plot_summary, "/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/10_DEGs/pair-wise_comp.csv")
plot_summary
plot1 <- ggplot(data=plot_summary, aes(x=Time, y = up,color = Accession)) +
geom_point(size = 1) +
geom_line(alpha = 0.3,size = 1, aes(group= Accession)) +theme_bw() +
ylab("Numbers of up-regulated genes")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
xlab("Time Point") +
theme(legend.position="none")
## NULL
plot2 <- ggplot(data=plot_summary, aes(x=Time, y = Down,color = Accession)) +
geom_point(size = 1) +
geom_line(alpha = 0.3,size = 1, aes(group= Accession)) +theme_bw() +
ylab("Numbers of Down-regulated genes") +
xlab("Time Point")
plot_grid(plot1, plot2, labels = c('A', 'B'), label_size = 12)
### Generate the non-duplicated DEGs list
Down_list <- unique(data.frame(GeneID = donw_gene[,"GeneID"]))
Up_list <- unique(data.frame(GeneID = up_gene[,"GeneID"]))
head(Down_list,20)
head(Up_list,20)
### Check sample list
sample_list <- unique(data.frame(Sample = Combine[,"Sample"]))
sample_list
####for down-regulated DEGs
for (i in 1:nrow(sample_list)){
df <- left_join(Down_list,donw_gene[donw_gene$Sample == sample_list[i,1],c("GeneID","log2FoldChange")],by = "GeneID")
df <- as.data.frame(df[,2])
colnames(df) <- sample_list[i,1]
assign(paste0("DEGs_down_",sample_list[i,1]),df)
}
#Combined all dfs
merge_list1 <- lapply(ls(pattern = "DEGs_down_"), get)
down_combine <- bind_cols(Down_list ,merge_list1)
#Replace NA into 0
down_combine[is.na(down_combine)] <- 0
down_combine_upset <- down_combine
#Replace all values not equal with 1
for (i in 2:ncol(down_combine)){
down_combine_upset[,i] <- replace(x = down_combine_upset[,i],
down_combine_upset[,i] != 0.000000 ,
values = 1)
}
### Check the logFC in a combined table
head(down_combine, 40)
write.csv(down_combine,"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/Down.csv")
head(down_combine_upset, 40)
###Plot by time point
upset(down_combine_upset[,c("A_TP1","A_TP3","A_TP5","A_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP1","A_TP3","A_TP5","A_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(down_combine_upset[,c("E_TP1","E_TP3","E_TP5","E_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("E_TP1","E_TP3","E_TP5","E_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(down_combine_upset[,c("C_TP1","C_TP3","C_TP5","C_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("C_TP1","C_TP3","C_TP5","C_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(down_combine_upset[,c("B_TP1","B_TP3","B_TP5","B_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("B_TP1","B_TP3","B_TP5","B_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(down_combine_upset[,c("GeneID","D_TP1","D_TP3","D_TP5","D_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("D_TP1","D_TP3","D_TP5","D_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
#write.csv(down_combine_upset[,c("GeneID","D_TP1","D_TP3","D_TP5","D_TP7")], "/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/10_DEGs/test.csv")
upset(down_combine_upset[,c("GeneID","F_TP1","F_TP3","F_TP5","F_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("F_TP1","F_TP3","F_TP5","F_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
###Plot by accessions
upset(down_combine_upset[,c("GeneID","A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(down_combine_upset[,c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(down_combine_upset[,c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(down_combine_upset[,c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
####for down-regulated DEGs
for (i in 1:nrow(sample_list)){
df <- left_join(Up_list,up_gene[up_gene$Sample == sample_list[i,1],c("GeneID","log2FoldChange")],by = "GeneID")
df <- as.data.frame(df[,2])
colnames(df) <- sample_list[i,1]
assign(paste0("DEGs_up_",sample_list[i,1]),df)
}
#Combined all dfs
merge_list2 <- lapply(ls(pattern = "DEGs_up_"), get)
Up_combine <- bind_cols(Up_list ,merge_list2)
#Replace NA into 0
Up_combine[is.na(Up_combine)] <- 0
Up_combine_upset <- Up_combine
#Replace all values not equal with 1
for (i in 2:ncol(Up_combine)){
Up_combine_upset[,i] <- replace(x = Up_combine_upset[,i],
Up_combine_upset[,i] != 0.000000 ,
values = 1)
}
head(Up_combine, 40)
write.csv(down_combine,"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/Down.csv")
head(Up_combine_upset, 40)
#write.csv(Up_combine,"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/10_DEGs/Up.csv")
###Plot by time point
upset(Up_combine_upset[,c("A_TP1","A_TP3","A_TP5","A_TP7")],
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP1","A_TP3","A_TP5","A_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(Up_combine_upset[,c("E_TP1","E_TP3","E_TP5","E_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("E_TP1","E_TP3","E_TP5","E_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(Up_combine_upset[,c("C_TP1","C_TP3","C_TP5","C_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("C_TP1","C_TP3","C_TP5","C_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(Up_combine_upset[,c("B_TP1","B_TP3","B_TP5","B_TP7")],
nintersects = 5,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("B_TP1","B_TP3","B_TP5","B_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(Up_combine_upset[,c("D_TP1","D_TP3","D_TP5","D_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("D_TP1","D_TP3","D_TP5","D_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
upset(Up_combine_upset[,c("F_TP1","F_TP3","F_TP5","F_TP7")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("F_TP1","F_TP3","F_TP5","F_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 3,
matrix.color = "steelblue",
sets.bar.color = "grey"
)
###Plot by accessions
upset(Up_combine_upset[,c("A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 2,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(Up_combine_upset[,c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 2,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(Up_combine_upset[,c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5")],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 2,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)
upset(Up_combine_upset[,c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7")],
nintersects = 200,
nsets = 6,
order.by = c("freq", "degree"),
decreasing = c(FALSE,TRUE),
sets = c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7"),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 2,
point.size = 4,
line.size = 2,
matrix.color = "steelblue",
sets.bar.color = "tomato"
)