Load libraries

library(dplyr)
library(tidyr)
library(cowplot)
library(UpSetR)
library(gridExtra)
library(ggplot2)

Load and merge DEGs and statistics

A_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_A.csv", header =T)
B_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_B.csv", header =T)
C_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_C.csv", header =T)
D_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_D.csv", header =T)
E_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_E.csv", header =T)
F_Group <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/DEGs_F.csv", header =T)


Combine <- na.omit(rbind(A_Group,B_Group,C_Group,D_Group,E_Group,F_Group))
names(Combine)[8] = "GeneID"
head(Combine, 50)
### Filter differentially expressed genes based on following creteria
donw_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange < -1,]
donw_gene <- donw_gene %>% group_by(Sample) %>% mutate(Down = n())
donw_summary <- unique(donw_gene[,c("Sample","Down")])

up_gene <- Combine[Combine$padj < 0.05 & Combine$log2FoldChange > 1,]
up_gene <- up_gene %>% group_by(Sample) %>% mutate(up = n())
up_summary <- unique(up_gene[,c("Sample","up")])

combind_summary <- left_join(up_summary, donw_summary, by = "Sample")
head(combind_summary, 40) 

Plot line graph of DEGs at differene time

plot_summary <- combind_summary  %>% separate(Sample, c("Accession", "Time"))
plot_summary[is.na(plot_summary)] = 0
#write.csv(plot_summary, "/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/10_DEGs/pair-wise_comp.csv")
plot_summary
plot1 <- ggplot(data=plot_summary, aes(x=Time, y = up,color = Accession)) +
  geom_point(size = 1) +
  geom_line(alpha = 0.3,size = 1, aes(group= Accession)) +theme_bw() +
  ylab("Numbers of up-regulated genes") 
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
  xlab("Time Point") +
  theme(legend.position="none")
## NULL
plot2 <- ggplot(data=plot_summary, aes(x=Time, y = Down,color = Accession)) +
  geom_point(size = 1) +
  geom_line(alpha = 0.3,size = 1, aes(group= Accession)) +theme_bw() +
  ylab("Numbers of Down-regulated genes") +
  xlab("Time Point")

plot_grid(plot1, plot2, labels = c('A', 'B'), label_size = 12)

Build unique list of all samples and unique DEGs

### Generate the non-duplicated DEGs list
Down_list <- unique(data.frame(GeneID = donw_gene[,"GeneID"]))
Up_list <- unique(data.frame(GeneID = up_gene[,"GeneID"]))
head(Down_list,20)
head(Up_list,20)
### Check sample list
sample_list <- unique(data.frame(Sample = Combine[,"Sample"]))
sample_list

Build dataframe for down regulated genes

####for down-regulated DEGs
for (i in 1:nrow(sample_list)){
  df <- left_join(Down_list,donw_gene[donw_gene$Sample == sample_list[i,1],c("GeneID","log2FoldChange")],by = "GeneID")
  df <- as.data.frame(df[,2])
  colnames(df) <- sample_list[i,1]
  assign(paste0("DEGs_down_",sample_list[i,1]),df)
}
#Combined all dfs
merge_list1 <- lapply(ls(pattern = "DEGs_down_"), get)
down_combine <- bind_cols(Down_list ,merge_list1)

#Replace NA into 0
down_combine[is.na(down_combine)] <- 0
down_combine_upset <- down_combine

#Replace all values not equal with 1
for (i in 2:ncol(down_combine)){
down_combine_upset[,i] <-  replace(x = down_combine_upset[,i], 
                   down_combine_upset[,i] != 0.000000   , 
                   values =  1)
}

### Check the logFC in a combined table
head(down_combine, 40)
write.csv(down_combine,"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/Down.csv")

head(down_combine_upset, 40)

Plot down regulated genes in upset plot

###Plot by time point 
upset(down_combine_upset[,c("A_TP1","A_TP3","A_TP5","A_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP1","A_TP3","A_TP5","A_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(down_combine_upset[,c("E_TP1","E_TP3","E_TP5","E_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("E_TP1","E_TP3","E_TP5","E_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(down_combine_upset[,c("C_TP1","C_TP3","C_TP5","C_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("C_TP1","C_TP3","C_TP5","C_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(down_combine_upset[,c("B_TP1","B_TP3","B_TP5","B_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("B_TP1","B_TP3","B_TP5","B_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(down_combine_upset[,c("GeneID","D_TP1","D_TP3","D_TP5","D_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("D_TP1","D_TP3","D_TP5","D_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

#write.csv(down_combine_upset[,c("GeneID","D_TP1","D_TP3","D_TP5","D_TP7")], "/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/10_DEGs/test.csv")


upset(down_combine_upset[,c("GeneID","F_TP1","F_TP3","F_TP5","F_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("F_TP1","F_TP3","F_TP5","F_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

###Plot by accessions
upset(down_combine_upset[,c("GeneID","A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "tomato"
)

upset(down_combine_upset[,c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "tomato"
)

upset(down_combine_upset[,c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "tomato"
)

upset(down_combine_upset[,c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "tomato"
)

Build dataframe for up regulated genes

####for down-regulated DEGs
for (i in 1:nrow(sample_list)){
  df <- left_join(Up_list,up_gene[up_gene$Sample == sample_list[i,1],c("GeneID","log2FoldChange")],by = "GeneID")
  df <- as.data.frame(df[,2])
  colnames(df) <- sample_list[i,1]
  assign(paste0("DEGs_up_",sample_list[i,1]),df)
}
#Combined all dfs
merge_list2 <- lapply(ls(pattern = "DEGs_up_"), get)
Up_combine <- bind_cols(Up_list ,merge_list2)

#Replace NA into 0
Up_combine[is.na(Up_combine)] <- 0
Up_combine_upset <- Up_combine

#Replace all values not equal with 1
for (i in 2:ncol(Up_combine)){
Up_combine_upset[,i] <-  replace(x = Up_combine_upset[,i], 
                   Up_combine_upset[,i] != 0.000000 , 
                   values =  1)
}


head(Up_combine, 40)
write.csv(down_combine,"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/Down.csv")

head(Up_combine_upset, 40)
#write.csv(Up_combine,"/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/10_DEGs/Up.csv")

Plot Up-regulated genes in upset plot

###Plot by time point 
upset(Up_combine_upset[,c("A_TP1","A_TP3","A_TP5","A_TP7")], 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP1","A_TP3","A_TP5","A_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(Up_combine_upset[,c("E_TP1","E_TP3","E_TP5","E_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("E_TP1","E_TP3","E_TP5","E_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(Up_combine_upset[,c("C_TP1","C_TP3","C_TP5","C_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("C_TP1","C_TP3","C_TP5","C_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(Up_combine_upset[,c("B_TP1","B_TP3","B_TP5","B_TP7")], 
      nintersects = 5,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("B_TP1","B_TP3","B_TP5","B_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(Up_combine_upset[,c("D_TP1","D_TP3","D_TP5","D_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("D_TP1","D_TP3","D_TP5","D_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

upset(Up_combine_upset[,c("F_TP1","F_TP3","F_TP5","F_TP7")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("F_TP1","F_TP3","F_TP5","F_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 3,
      matrix.color = "steelblue",
      sets.bar.color = "grey"
)

###Plot by accessions
upset(Up_combine_upset[,c("A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP1","B_TP1","C_TP1","D_TP1","E_TP1","F_TP1"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 2,
      matrix.color = "steelblue",
      sets.bar.color = "tomato"
)

upset(Up_combine_upset[,c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP3","B_TP3","C_TP3","D_TP3","E_TP3","F_TP3"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 2,
      matrix.color = "steelblue",
      sets.bar.color = "tomato"
)

upset(Up_combine_upset[,c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5")], 
      nintersects = 200,
      nsets = 4, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP5","B_TP5","C_TP5","D_TP5","E_TP5","F_TP5"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 2,
      matrix.color = "steelblue",
      sets.bar.color = "tomato"
)

upset(Up_combine_upset[,c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7")], 
      nintersects = 200,
      nsets = 6, 
      order.by = c("freq", "degree"), 
      decreasing = c(FALSE,TRUE),
      sets = c("A_TP7","B_TP7","C_TP7","D_TP7","E_TP7","F_TP7"),
      keep.order = T,
      mb.ratio = c(0.5, 0.5),
      number.angles = 0, 
      text.scale = 2, 
      point.size = 4, 
      line.size = 2,
      matrix.color = "steelblue",
      sets.bar.color = "tomato"
)