library(dplyr)
library(tidyr)
library(ggplot2)
library(ggVennDiagram)
library(ggvenn)
library(ggpubr)
library(UpSetR)
library(pheatmap)
library(corrplot)
library(stringr)
library(stringr)
library(zFPKM)
library(RColorBrewer)
library(viridis)
library(matrixStats)
library(ggplot2)
library(ggpubr)
library(Hmisc)
library(patchwork)
library(gridExtra)
library(reshape2)
RMF: Relative modification frequency
RMA: Relative modification abundance
RML: Relative modification length-normalized
frequency
### load meta information
setwd("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/11_MODs_depth/data/")
merge_DP.list <- read.table("sample.list")
colnames(merge_DP.list) <- "depth_ID"
### generate the additional column
merge_DP.list$H_correlation <- "fill"
merge_DP.list$H_Pvalue <- "fill"
merge_DP.list$M_correlation <- "fill"
merge_DP.list$M_Pvalue <- "fill"
head(merge_DP.list)
### define the correlation extraction function
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
row = rownames(cormat)[row(cormat)[ut]],
column = rownames(cormat)[col(cormat)[ut]],
cor =(cormat)[ut],
p = pmat[ut]
)
}
### define function
fun_length <- function(x){
return(data.frame(y=median(x),label= paste0("", length(x))))
}
### load each depth samples
for (i in 1:nrow(merge_DP.list)){
DP.df <- unique(read.table(paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/11_MODs_depth/data/",merge_DP.list[i,1]), header = F))
colnames(DP.df) <- c("Chr", "pos1", "pos2", "Index", "Nucleotides",
"M_ratio1", "M_ratio2", "H_nonref1", "H_ref1", "H_total1",
"H_nonref2", "H_ref2", "H_total2", "H_mods1", "H_mod2")
### calculate the HAMR retio
DP.df$H_ratio1 <- DP.df$H_nonref1/DP.df$H_total1
DP.df$H_ratio2 <- DP.df$H_nonref2/DP.df$H_total2
DP.df$M_ratio <- (DP.df$M_ratio1 + DP.df$M_ratio2)/2
DP.df$H_ratio <- (DP.df$H_ratio1 + DP.df$H_ratio2)/2
### Plot the pearson correlation
#corrplot.mixed(cor(DP.df[,c("H_ratio1","H_ratio2","M_ratio1", "M_ratio2")]), order = 'AOE')
### Take the HAMR for scatter plot
Plot.DP1 <- DP.df[,c("H_ratio1","H_ratio2")]
res <- rcorr(Plot.DP1[,1], Plot.DP1[,2], type=c("pearson"))
merge_DP.list[i,2:3] <- flattenCorrMatrix(res$r, res$P)[,3:4]
Plot.DP2 <- DP.df[,c("M_ratio1","M_ratio2")]
res <- rcorr(Plot.DP2[,1], Plot.DP2[,2], type=c("pearson"))
merge_DP.list[i,4:5] <- flattenCorrMatrix(res$r, res$P)[,3:4]
#print(ggscatter(Plot.DP1, x = "H_ratio1", y = "H_ratio2",
#add = "reg.line", conf.int = TRUE,
#cor.coef = TRUE, cor.method = "pearson",
#xlab = "HAMR_ratio1", ylab = "HAMR_ratio2",
#color = "black", shape = 21, size = 2,
#add.params = list(color = "blue", fill = "lightgray"),
#title= paste0("Ratio correlation of ",merge_DP.list[i,1])))
assign(merge_DP.list[i,1], DP.df)
}
temp1 <- merge_DP.list[,c(1,2)]
temp1$type <- "HAMR"
colnames(temp1) <- c("depth_ID", "correlation", "type")
head(temp1, 30)
temp2 <- merge_DP.list[,c(1,4)]
temp2$type <- "Modtect"
colnames(temp2) <- c("depth_ID", "correlation", "type")
head(temp2, 30)
Plot_bind <-rbind(temp1, temp2)
Plot_bind$correlation <- as.numeric(Plot_bind$correlation)
head(Plot_bind, 40)
###Plot the combined correlation
#ggplot(Plot_bind, aes(x=type, y=correlation, color = type)) +
#geom_boxplot() + theme_bw() +
#geom_jitter() +
#ylab("Pearson-correlation of modified ratio between two replicates")
comp.df <- data.frame("depth_ID" = merge_DP.list[,1])
### Calculate the WL-WW ratio changes using log2Ratio
comp.df <- data.frame("WL_sample" = comp.df[grepl("WL",comp.df$depth_ID),],
"WW_sample" = comp.df[grepl("WW",comp.df$depth_ID),])
comp.df$comp_set <- str_replace(comp.df[,1], "WL_", "")
comp.df$comp_set <- str_replace(comp.df$comp_set, ".merge.depth.mods", "_comp_ratio")
head(comp.df, 50)
### load the TPM and logFC data for comparison
TPM_mean <- read.csv("//Users/leon/OneDrive - Cornell University/Projects/9_Sorghum/3_FeatureCounts/Sorghum_TPM_mean.20221025.csv", row.names = 1)
head(TPM_mean, 20)
### Load the logFC data for comparison
Gene_logFC <- read.csv("/Users/leon/OneDrive - Cornell University/Projects/9_Sorghum/4_DEGs/All_DEGs.csv")
Gene_logFC <- Gene_logFC %>% separate("Sample", c("Accession", "Time"))
head(Gene_logFC, 30)
### combine the ratio for each
for (i in 1:nrow(comp.df)){
df1 <- get(comp.df[i,1])
df1 <- df1[,c("Index", "Nucleotides","H_ratio1", "H_ratio2","H_ratio","H_total1","H_total2")]
colnames(df1) <- c("Index", "Nucleotids_WL","H_WL_ratio1", "H_WL_ratio2","H_ratio_WL","H_total1_WL","H_total2_WL")
df2 <- get(comp.df[i,2])
df2 <- df2[,c("Index", "Nucleotides","H_ratio1","H_ratio2","H_ratio","H_total1","H_total2")]
colnames(df2) <- c("Index", "Nucleotids_WW","H_WW_ratio1", "H_WW_ratio2","H_ratio_WW","H_total1_WW","H_total2_WW")
### Perform the t.test and logFC calculation (common PTMs)
join_df <- inner_join(df1,df2, by = "Index")
join_df$log2FC <- log2(join_df$H_ratio_WL/join_df$H_ratio_WW)
#join_df$P.value <- apply(join_df,1,function(x){t.test(as.numeric(x[3:4]),as.numeric(x[7:8]), paired = T)$p.value})
join_df$set <- comp.df[i,3]
join_df <- join_df %>% separate(set, c("Accession", "Time"))
### Plot the average of ratio between two conditions for common mods
#print(ggscatter(join_df, x = "H_ratio_WL", y = "H_ratio_WW",
#add = "reg.line", conf.int = TRUE,
#cor.coef = TRUE, cor.method = "pearson",
#xlab = "H_ratio_WL", ylab = "H_ratio_WW",
#color = "black", shape = 21, size = 2,
#add.params = list(color = "blue", fill = "lightgray"),
#title= paste0("Ratio correlation of ",comp.df[1,3])))
#assign(paste0(comp.df[i,3], "_pairedGgenes"), join_df)
}
head(join_df, 40)
All genes: Inclusion of paired, WL-specific, and WW-specific modified
sites
### combine the ratio for each
for (i in 1:nrow(comp.df)){
df1 <- get(comp.df[i,1])
df1 <- df1[,c("Index", "Nucleotides","H_ratio1", "H_ratio2","H_ratio","H_total1","H_total2")]
colnames(df1) <- c("Index", "Nucleotids_WL","H_WL_ratio1", "H_WL_ratio2","H_ratio_WL","H_total1_WL","H_total2_WL")
df2 <- get(comp.df[i,2])
df2 <- df2[,c("Index", "Nucleotides","H_ratio1","H_ratio2","H_ratio","H_total1","H_total2")]
colnames(df2) <- c("Index", "Nucleotids_WW","H_WW_ratio1", "H_WW_ratio2","H_ratio_WW","H_total1_WW","H_total2_WW")
### Perform the t.test and logFC calculation (all PTMs)
all_df <- full_join(df1,df2, by = "Index")
all_df[is.na(all_df)] <- 0
all_df$set <- comp.df[i,3]
all_df$group <- "fill"
all_df[all_df$H_ratio_WL == 0,15] <- "WW"
all_df[all_df$H_ratio_WW == 0,15] <- "WL"
all_df[all_df$H_ratio_WW != 0 & all_df$H_ratio_WL != 0,15] <- "both"
all_df$Delta <- all_df$H_ratio_WL-all_df$H_ratio_WW
all_df <- all_df %>% separate(set, c("Accession", "Time"))
### Plot the average of ratio with condition-specific PTMs included
#print(ggscatter(all_df, x = "H_ratio_WL", y = "H_ratio_WW", fill = "group",
#add = "reg.line", conf.int = TRUE,
#cor.coef = TRUE, cor.method = "pearson",
#xlab = "H_ratio_WL", ylab = "H_ratio_WW",
#color = "black", shape = 21, size = 2,
#add.params = list(color = "blue", fill = "lightgray"),
#palette = c(WW="steelblue", WL="tomato", both="grey60"),
#title= paste0("Ratio correlation of ",comp.df[i,3])))
assign(paste0(comp.df[i,3], "_all_genes"), all_df)
}
head(all_df, 40)
### extract the unique index over all samples
comp_list <- lapply(ls(pattern = "_comp_ratio_all_genes"), get)
comp_combine <- bind_rows(comp_list)
nrow(comp_combine)
## [1] 154588
head(comp_combine, 60)
write.csv(comp_combine, "/Users/leon/Documents/Project/Sorghum/PTMs/RMA_comparison_allSites_20230206.csv", row.names = F, quote = F)
Index.df <- data.frame("Index" = unique(comp_combine$Index))
head(Index.df, 80)
write.csv(Index.df, "/Users/leon/Documents/Project/Sorghum/PTMs/RMA_allSites_Index.csv", row.names = F, quote = F)
RMA_cor <- read.csv("/Users/leon/Documents/Project/Sorghum/PTMs/RMA_comparison_allSites_20230206.csv", header = T)
head(RMA_cor)
RMA_cor %>%
ggplot(aes(x=H_ratio_WL, y=H_ratio_WW, color=Time))+
geom_point(size = 1)+
scale_color_manual(values = c(TP1="#D8DAEB", TP3="#B2ABD2", TP5="#8073AC", TP7 ="#542788"))+
theme_bw()+
ggtitle("Comparison of RMF from two conditions of across accessions") +
facet_wrap(~Accession,scale='free_x') +
stat_smooth(aes(color= Time), size = 0.5, level = 0.95) +
geom_abline(slope=1, intercept=0, shape = 2) +
stat_cor()
RMA_cor %>%
ggplot(aes(x=H_ratio_WL, y=H_ratio_WW, color=Accession))+
geom_point(size = 1, alpha = 0.5)+
scale_color_manual(values = c(A="#1B9E77", B="#D95F02", C="#666666", D="#E7298A", E="#66A61E", F="#E6AB02"))+
theme_bw()+
ggtitle("Comparison of RMF from two conditions of across accessions") +
facet_wrap(~Time) +
stat_smooth(aes(color= Accession), size = 0.5, level = 0.95) +
stat_cor()
ggplot(RMA_cor, (aes(x=Accession, y=Delta, fill = Time))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_bw() +
ylab("Delta of RMA between conditions across time")+
scale_fill_manual(values = c(TP1="#D8DAEB", TP3="#B2ABD2", TP5="#8073AC", TP7 ="#542788")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Time), method = "kruskal.test", label = "p.signif") +
stat_summary(fun.data = fun_length, geom = "text",vjust = -20, position = position_dodge(0.75), size = 1, color = "red")
ggplot(RMA_cor, (aes(x=Time, y=Delta, fill = Accession))) +
geom_boxplot(outlier.size = 0.2, outlier.shape = 21) +
theme_bw() +
ylab("Delta of RMA between conditions across time")+
scale_fill_manual(values = c(A="#1B9E77", B="#D95F02", C="#666666", D="#E7298A", E="#66A61E", F="#E6AB02")) +
stat_summary(fun = mean, color = "darkred", position = position_dodge(0.75),
geom = "point", shape = 4, size = 3,
show.legend = FALSE) +
stat_compare_means(aes(group = Accession), method = "kruskal.test", label = "p.signif") +
stat_summary(fun.data = fun_length, geom = "text",vjust = -20, position = position_dodge(0.75), size = 1, color = "red")
MODs_counts_list <-
read.table("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/11_MODs_profile/1_MODs_count/Intersection/sample.list", header = F)
colnames(MODs_counts_list) <- "transcripts.sample"
MODs_counts_list[,1] <- str_replace(MODs_counts_list[,1], ".anno.gene.clean", "")
MODs_counts_list
Gene_frame <- read.table("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/11_MODs_profile/1_MODs_count/WGCNA_transcripts.txt", header = T)
colnames(Gene_frame) <- c("transcripts", "length")
head(Gene_frame)
for (i in 1:nrow(MODs_counts_list)){
PATH <- "/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/11_MODs_profile/1_MODs_count/Intersection/"
sample.df <- read.table(paste0(PATH,MODs_counts_list[i,1],".anno.gene.clean"), header = F)
colnames(sample.df) <- c("transcripts", "interval", "Chr", "Site","MOD1", "MOD2")
### to be exported and join with the depth file
sample.df$Index <- paste0(sample.df$Chr,";",sample.df$Site)
DP.df <- get(merge_DP.list[i,1])
DP2TP <- inner_join(DP.df,sample.df, by = "Index")
### transform into log2
DP2TP$log2H_nonref1 <- log2(DP2TP$H_nonref1)
DP2TP$log2H_total1 <- log2(DP2TP$H_total1)
### plot the correlation at the nucleotide level
#print(ggscatter(DP2TP, x = "log2H_nonref1", y = "log2H_total1",
#add = "reg.line", conf.int = TRUE,
#cor.coef = TRUE, cor.method = "pearson",
#ylab = "log2 total read counts", xlab = "log2 modified read counts",
#color = "black", shape = 21, size = 2,
#add.params = list(color = "blue", fill = "lightgray"),
#title= paste0("Correlation of RNA abundance and modification level of ",MODs_counts_list[i,1])))
DP2TP_clean <- DP2TP[,c("Index","Nucleotides", "MOD1", "MOD2", "transcripts", "M_ratio", "H_ratio")]
DP2TP_clean <- DP2TP_clean %>% group_by(transcripts) %>% mutate(transcripts.count = n())
DP2TP_clean <- left_join(Gene_frame,DP2TP_clean,by = "transcripts")
DP2TP_clean$RML <- DP2TP_clean$transcripts.count/DP2TP_clean$length
count.df <- unique(DP2TP_clean[, c("transcripts", "RML")])
count.df[is.na(count.df)] <- 0
MODs.df <- as.data.frame(count.df[,2])
colnames(MODs.df)<- MODs_counts_list[i,1]
assign(paste(MODs_counts_list[i,1], "_RML"),MODs.df)
assign(paste0(MODs_counts_list[i,1], "_DP2TP"), DP2TP_clean)
}
DP_set <- ls(pattern = "_DP2TP") %>% str_replace("_DP2TP","")
TPM_mean$transcripts <- rownames(TPM_mean)
for (i in 1:length(DP_set)){
DP.df <- get(paste0(DP_set[1], "_DP2TP"))
### modify here if M_ratio needed
DP.TPM <- left_join(DP.df, TPM_mean, by = "transcripts")
DP.TPM.clean <- DP.TPM[,c("Index", "Nucleotides", "MOD1", "MOD2", "transcripts", "transcripts.count", "H_ratio", DP_set[i])]
DP.TPM.clean$H_ratio <- log2(DP.TPM.clean$H_ratio)
DP.TPM.clean[,8] <- log2(DP.TPM.clean[,8])
### Plot transcripts with one modified nucleotides only
# print(ggscatter(DP.TPM.clean[DP.TPM.clean$transcripts.count == 1,], y = "H_ratio", x = DP_set[i],
# add = "reg.line", conf.int = TRUE,
# cor.coef = TRUE, cor.method = "pearson",
# ylab = "log2(H_ratio)", xlab = "log2(TPM)",
# color = "black", shape = 21, size = 2,
#add.params = list(color = "blue", fill = "lightgray"),
#title= paste0("Correlation of RNA abundance and modification level of ",DP_set[i])))
}
### collapse the RML into dataframe
RML_list <- lapply(ls(pattern = " _RML"), get)
RML_combine <- bind_cols(RML_list)
rownames(RML_combine) <- Gene_frame$transcripts
write.csv(RML_combine, "/Users/leon/Documents/Project/Sorghum/Modification_Pattern/RML.csv")
head(RML_combine, 50)
RML_combine_upset <- RML_combine
for (i in 1:ncol(RML_combine_upset)){
RML_combine_upset[,i] <- replace(x = RML_combine_upset [,i],
RML_combine_upset [,i] != 0.000000 ,
values = 1)}
head(RML_combine_upset, 40)
### bind the DP2TP to extract site2genes
DP2TP_list <- lapply(ls(pattern = "_DP2TP"), get)
DP2TP_combine <- bind_rows(DP2TP_list)
head(na.omit(DP2TP_combine), 50)
sites2gene <- unique(DP2TP_combine[c("Index","transcripts", "Nucleotides", "MOD1", "MOD2")]) %>% na.omit()
head(sites2gene, 50)
write.csv(sites2gene, "/Users/leon/Documents/Project/Sorghum/Modification_Pattern/sites2gene.csv")
comp.frame <- data.frame("sample" = str_replace(colnames(RML_combine), "WL_", "") %>% str_replace("WW_", "") %>% unique())
comp.frame$sample2 <- comp.frame$sample
comp.frame <- comp.frame %>% separate(sample2, c("Accession", "Time"))
comp.frame
### combine the information
for (i in 1:nrow(comp.frame)){
RML.temp <- RML_combine[,grepl(comp.frame[i,2], colnames(RML_combine))]
RML.temp <- RML.temp[,grepl(comp.frame[i,3], colnames(RML.temp))]
colnames(RML.temp) <- c("WL_RML", "WW_RML")
RML.temp$Accession <- comp.frame[i,2]
RML.temp$Time <- comp.frame[i,3]
RML.temp$total <- RML.temp$WL_RML + RML.temp$WW_RML
RML.temp <- RML.temp[RML.temp$total != 0,c("WL_RML", "WW_RML", "Accession", "Time")]
RML.temp$transcripts <- rownames(RML.temp)
rownames(RML.temp) <- NULL
### reformat TPM to RML table
EXT1 <- TPM_mean[,grepl(comp.frame[i,2],colnames(TPM_mean))]
EXT2 <- EXT1[, grepl(comp.frame[i,3],colnames(EXT1))]
EXT2$transcripts <- TPM_mean$transcripts
### reformat logFC to RML table
logFC_temp <- Gene_logFC[Gene_logFC$Accession == comp.frame[i,2] & Gene_logFC$Time == comp.frame[i,3],
c("transcripts", "log2FoldChange", "Accession", "Time")]
RML_temp2 <- left_join(RML.temp, EXT2, by = "transcripts")
RML_join <- left_join(RML_temp2, logFC_temp, by = "transcripts")
colnames(RML_join) <- c("WL_RML", "WW_RML", "Accession.x", "Time.x", "transcripts", "WW_TPM", "WL_TPM", "log2FoldChange", "Accession", "Time")
RML_join_clean <- RML_join[, c("WL_RML", "WW_RML", "transcripts", "WW_TPM", "WL_TPM", "log2FoldChange", "Accession", "Time")]
#colnames(RML_join)
assign(paste0(comp.frame[i,1], "_RML_stats"), RML_join_clean)
}
head(RML_join_clean, 40)
RML_merge <- lapply(ls(pattern = "_RML_stats"), get)
RML_all <- bind_rows(RML_merge)
head(RML_all, 50)
### Classify groups based on RML and TPM
RML_all$group <- "both"
RML_all[RML_all$WL_RML == 0,"group"] <- "WW"
RML_all[RML_all$WW_RML == 0,"group"] <- "WL"
nrow(RML_all[RML_all$group == "both",])
## [1] 26196
nrow(RML_all[RML_all$group != "both",])
## [1] 17286
Acc <- c("A","B","C","D","E","F")
Time <- c("TP1","TP3","TP5","TP7")
### define the summary table
IS_summary <- data.frame("Accession"=rep(Acc, times = 4),
"Time"= rep(Time, each = 6))
IS_summary$WL_specifc <- "Fill"
IS_summary$WW_specifc <- "Fill"
IS_summary$Overlap <- "Fill"
IS_plot <- melt(IS_summary, id.vars=c("Accession", "Time"))
IS_plot$value <- as.numeric(IS_plot$value)
IS_plot$Index <- paste0(IS_plot$Accession, IS_plot$variable)
IS_plot$variable <- as.character(IS_plot$variable)
IS_plot
IS_plot %>% ggplot(aes(x=Time, y = value, color= Accession, group = Index)) +
geom_line(alpha = 0.8,size = 1, aes(shape = variable))+
theme_bw()+
scale_color_manual(values = c(A="#1B9E77", B="#D95F02", C="#666666", D="#E7298A", E="#66A61E", F="#E6AB02")) +
ylab("Numbers of modified transcripts")
## Warning in geom_line(alpha = 0.8, size = 1, aes(shape = variable)): Ignoring
## unknown aesthetics: shape
merge_LS <- lapply(ls(pattern = "_WL_specific"), get)
WL_specific_combine <- bind_rows(merge_LS)
head(WL_specific_combine)
merge_WS <- lapply(ls(pattern = "_WW_specific"), get)
WW_specific_combine <- bind_rows(merge_WS)
head(WW_specific_combine)
Observe the WL and WW-specific modified transcripts shift over time or over accessions
WL_meta_gene <- data.frame("WL_Sgene"=unique(WL_specific_combine[,1]))
WW_meta_gene <- data.frame("WW_Sgene"=unique(WW_specific_combine[,1]))
WL_sample <- ls(pattern = "_WL_specific")
WW_sample <- ls(pattern = "_WW_specific")
WL_sample
## [1] "A_TP1_WL_specific" "A_TP3_WL_specific" "A_TP5_WL_specific"
## [4] "A_TP7_WL_specific" "B_TP1_WL_specific" "B_TP3_WL_specific"
## [7] "B_TP5_WL_specific" "B_TP7_WL_specific" "C_TP1_WL_specific"
## [10] "C_TP3_WL_specific" "C_TP5_WL_specific" "C_TP7_WL_specific"
## [13] "D_TP1_WL_specific" "D_TP3_WL_specific" "D_TP5_WL_specific"
## [16] "D_TP7_WL_specific" "E_TP1_WL_specific" "E_TP3_WL_specific"
## [19] "E_TP5_WL_specific" "E_TP7_WL_specific" "F_TP1_WL_specific"
## [22] "F_TP3_WL_specific" "F_TP5_WL_specific" "F_TP7_WL_specific"
WW_sample
## [1] "A_TP1_WW_specific" "A_TP3_WW_specific" "A_TP5_WW_specific"
## [4] "A_TP7_WW_specific" "B_TP1_WW_specific" "B_TP3_WW_specific"
## [7] "B_TP5_WW_specific" "B_TP7_WW_specific" "C_TP1_WW_specific"
## [10] "C_TP3_WW_specific" "C_TP5_WW_specific" "C_TP7_WW_specific"
## [13] "D_TP1_WW_specific" "D_TP3_WW_specific" "D_TP5_WW_specific"
## [16] "D_TP7_WW_specific" "E_TP1_WW_specific" "E_TP3_WW_specific"
## [19] "E_TP5_WW_specific" "E_TP7_WW_specific" "F_TP1_WW_specific"
## [22] "F_TP3_WW_specific" "F_TP5_WW_specific" "F_TP7_WW_specific"
### For WL-sample
for (i in 1:length(WL_sample)){
temp.df <- get(WL_sample[i])
temp.df$type <- 1
MODs.df <- left_join(WL_meta_gene,temp.df,by="WL_Sgene")
MODs.df <- as.data.frame(MODs.df[,2])
colnames(MODs.df)<- WL_sample[i]
assign(paste0(WL_sample[i], "_WL_bind"),MODs.df)
}
col_LS <- lapply(ls(pattern = "_WL_bind"), get)
WL_specific_bind <- bind_cols(col_LS)
rownames(WL_specific_bind) <- WL_meta_gene[,1]
WL_specific_bind[is.na(WL_specific_bind)] <- 0
head(WL_specific_bind, 20)
### For WW-sample
for (i in 1:length(WW_sample)){
temp.df <- get(WW_sample[i])
temp.df$type <- 1
MODs.df <- left_join(WW_meta_gene,temp.df,by="WW_Sgene")
MODs.df <- as.data.frame(MODs.df[,2])
colnames(MODs.df)<- WW_sample[i]
assign(paste0(WW_sample[i], "_WW_bind"),MODs.df)
}
col_LS <- lapply(ls(pattern = "_WW_bind"), get)
WW_specific_bind <- bind_cols(col_LS)
rownames(WW_specific_bind) <- WW_meta_gene[,1]
WW_specific_bind[is.na(WW_specific_bind)] <- 0
head(WW_specific_bind, 20)
Acc <- c("A","B","C","D","E","F")
for (i in 1:length(Acc)){
print(upset(WL_specific_bind[,grepl(paste0(Acc[i], "_"),colnames(WL_specific_bind))],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(TRUE,TRUE),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 1,
point.size = 2,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"))
}
for (i in 1:length(Acc)){
print(upset(WW_specific_bind[,grepl(paste0(Acc[i], "_"),colnames(WW_specific_bind))],
nintersects = 200,
nsets = 4,
order.by = c("freq", "degree"),
decreasing = c(TRUE,TRUE),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 1,
point.size = 2,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"))
}
Time <- c("TP1","TP3","TP5","TP7")
### Plot upset plot with per accession over time
for (i in 1:length(Time)){
print(upset(WL_specific_bind[,grepl(paste0(Time[i], "_"),colnames(WL_specific_bind))],
nintersects = 200,
nsets = 6,
order.by = c("freq", "degree"),
decreasing = c(TRUE,TRUE),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 1,
point.size = 2,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"))
}
for (i in 1:length(Time)){
print(upset(WW_specific_bind[,grepl(paste0(Time[i], "_"),colnames(WW_specific_bind))],
nintersects = 200,
nsets = 6,
order.by = c("freq", "degree"),
decreasing = c(TRUE,TRUE),
keep.order = T,
mb.ratio = c(0.5, 0.5),
number.angles = 0,
text.scale = 1,
point.size = 2,
line.size = 1,
matrix.color = "steelblue",
sets.bar.color = "tomato"))
}