library(dplyr)
library(pheatmap)
library(stringr)
library(zFPKM)
library(RColorBrewer)
library(viridis)
library(matrixStats)
library(ggplot2)
library(ggpubr)
### Library for Pearson correlation
library(Hmisc)
MM_score <- read.csv("/Users/leon/Documents/Project/Sorghum/CorrelationGene_module_membership_vsd100.csv", header= T)
colnames(MM_score)[1] <- "id"
colnames(MM_score) <- str_replace(colnames(MM_score), "MM", "")
Module_color <- colnames(MM_score[,-1])
### Load ids which associated with the module color
TranscriptsID <- read.table("/Users/leon/Documents/Project/Sorghum/Transcripts.matchinglist.txt", header = T)
head(TranscriptsID, 20)
#Extracting certain module
for (i in 1:length(Module_color)){
ID_temp <- TranscriptsID[TranscriptsID$module == Module_color[i], ]
MM_temp <- MM_score[,c("id", Module_color[i])]
MM_summary <- left_join(ID_temp,MM_temp, by = "id")
colnames(MM_summary)[3] <- "MM_score"
assign(paste0(Module_color[i], "_MM_summary"),MM_summary)
}
MM_list <- lapply(ls(pattern = "_MM_summary"), get)
MM_combine <- bind_rows(MM_list)
head(MM_combine, 20)
write.csv(MM_combine, "/Users/leon/Documents/Project/Sorghum/3_MM_vsd100_clean.csv")
### Load TPM of selected genes
Zscore_vsd <- read.csv("/Users/leon/Documents/Project/Sorghum/3_Zscore_vsd_20230101.csv", header = T, row.names = 1)
head(Zscore_vsd, 10)
RMF <- read.csv("/Users/leon/Documents/Project/Sorghum/3_RMF-20230101.csv", header = T, row.names = 1)
head(RMF, 10)
Zscore_vsd_module <- left_join(MM_combine[,1:2],Zscore_vsd %>% mutate(id = rownames(Zscore_vsd)), by = "id")
rownames(Zscore_vsd_module) <- Zscore_vsd_module$id
Zscore_vsd_module <- Zscore_vsd_module[,-1]
RMF_module <- left_join(MM_combine[,1:2],RMF %>% mutate(id = rownames(RMF)), by = "id")
#RMF_module[is.na(RMF_module)] <- 0
RMF_module <- na.omit(RMF_module)
rownames(RMF_module) <- RMF_module$id
RMF_module <- RMF_module[,-1]
RMF_module <- RMF_module[,-50]
head(Zscore_vsd_module,20)
head(RMF_module,20)
sample_meta <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/12_Heatmap/Heatmap_MetaTable.csv", header = T, row.names = 1)
sample_meta
Heatmap 1 is gene expression profile Heatmap 2 is PTM profile
module_name <- Zscore_vsd_module$module %>% unique()
for (i in 1:length(module_name)){
### define annotation coloes
annotation_colors = list(
Condition = c(WL="tomato", WW="steelblue"),
Time = c(TP1="#D8DAEB", TP3="#B2ABD2", TP5="#8073AC", TP7 ="#542788"),
Accession = c(A="#1B9E77", B="#D95F02", C="#666666", D="#E7298A", E="#66A61E", F="#E6AB02"))
Module_Plot1 <- Zscore_vsd_module[Zscore_vsd_module$module == module_name[i],-1]
### check if colnames and rownaems of annotation matched genes names and the samples namases
all(rownames(sample_meta) %in% colnames(Module_Plot1))
par(mfrow = c(1,2));
pheatmap(Module_Plot1, annotation_col = sample_meta[,1:3],
color = inferno(30),
annotation_colors = annotation_colors,
clustering_distance_r = "correlation",
show_colnames =T,show_rownames = F,cluster_rows = T, cluster_cols = T,
fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)
Module_Plot2 <- RMF_module[RMF_module$module == module_name[i],-1]
### check if colnames and rownaems of annotation matched genes names and the samples namases
all(rownames(sample_meta) %in% colnames(Module_Plot2))
pheatmap(Module_Plot2, annotation_col = sample_meta[,1:3],
color = inferno(30),
annotation_colors = annotation_colors,
clustering_distance_r = "euclidean",
show_colnames =T,show_rownames = F,cluster_rows = T, cluster_cols = T,
fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)
}
## Select genes associated with high GS-MM correlation
LIM <- read.csv("/Users/leon/Documents/Project/Sorghum/LIM_GS.csv", header = T)
colnames(LIM)[1] = "id"
Oxidative <- read.csv("/Users/leon/Documents/Project/Sorghum/Oxidative_GS.csv", header = T)
colnames(Oxidative)[1] = "id"
Metabolite <- read.csv("/Users/leon/Documents/Project/Sorghum/Metabolites_GS.csv", header = T)
colnames(Metabolite)[1] = "id"
MM <- read.csv("/Users/leon/Documents/Project/Sorghum/3_MM_vsd100_clean.csv",
header = T, row.names = 1)
head(MM,20)
### lable the module information under the GS files
LIM_clean <- left_join(MM, LIM, by = "id")
Oxidative_clean <- left_join(MM, Oxidative, by = "id")
Metabolites_clean <- left_join(MM, Metabolite, by = "id")
head(LIM_clean, 20)
head(Oxidative_clean, 20)
head(Metabolites_clean, 20)
GeneID <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/vsd100/Functional_annotation.csv",header = T)
head(GeneID, 20)
Meta1 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_oxidative_meta.csv", header = T)
Meta1
### Create sub gene list
for (i in 1:nrow(Meta1)){
temp_df <- Oxidative_clean[Oxidative_clean$module == Meta1[i,1],c("id","MM_score",Meta1[i,2])]
temp_df <- na.omit(temp_df[order(temp_df[,3]),])
### Filter by GS and MM
temp_df$ABS_MM <- abs(temp_df[,2])
temp_df$ABS_GS <- abs(temp_df[,3])
temp_df$Group <- "FALSE"
temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
temp_clean <- temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,]
temp_clean <- left_join(temp_clean,GeneID, by = "id")
write.csv(temp_clean,
paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta1[i,1], "_",Meta1[i,2], ".csv"), quote = F, row.names = F)
assign(paste0(Meta1[i,1], "_",Meta1[i,2], "_oxidative_item"), temp_df)
}
### Checked the merged file
head(temp_clean, 20)
### List item for further plot
Meta1_list <- ls(pattern = paste0("_oxidative_item"))
Meta1_list
## [1] "cyan_flav_oxidative_item"
## [2] "cyan_Grxs_oxidative_item"
## [3] "cyan_polyph_oxidative_item"
## [4] "cyan_TAC_oxidative_item"
## [5] "darkgrey_Grxs_oxidative_item"
## [6] "darkmagenta_ascorbate_oxidative_item"
## [7] "darkmagenta_flav_oxidative_item"
## [8] "darkmagenta_polyph_oxidative_item"
## [9] "darkmagenta_TAC_oxidative_item"
## [10] "ivory_APX_oxidative_item"
## [11] "ivory_CAT_oxidative_item"
## [12] "ivory_flav_oxidative_item"
## [13] "ivory_HPR_oxidative_item"
## [14] "ivory_polyph_oxidative_item"
## [15] "ivory_pro_oxidative_item"
## [16] "ivory_Prxs_oxidative_item"
## [17] "ivory_TAC_oxidative_item"
## [18] "paleturquoise_total_AA_oxidative_item"
## [19] "plum1_flav_oxidative_item"
## [20] "salmon_flav_oxidative_item"
## [21] "salmon_polyph_oxidative_item"
## [22] "salmon_TAC_oxidative_item"
## [23] "yellow_total_AA_oxidative_item"
## [24] "yellowgreen_Frxs_oxidative_item"
## [25] "yellowgreen_GPX_oxidative_item"
## [26] "yellowgreen_Trxs_oxidative_item"
for (i in 1:length(Meta1_list)){
which_module <- Meta1_list[i]
### Manual plot samples
print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
geom_point(size=1.5)+
scale_colour_manual(values=c("grey60", "#DE6757")) +
theme_bw()+
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+
labs(x=paste0("Module Membership in ", Meta1_list[i] ),
y=paste0("Gene significance for ", Meta1_list[i]))+
theme(axis.title.x =element_text(size=14),
axis.title.y=element_text(size=14),
axis.text = element_text(size = 12),
axis.text.x = element_text(colour = "black"),
axis.text.y = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
plot.margin = unit(rep(2,4),'lines')) +
theme(legend.position = 'none')+
geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5))
}
Meta2 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_LIC_meta.csv", header = T)
Meta2
### Create sub gene list
for (i in 1:nrow(Meta2)){
temp_df <- LIM_clean[LIM_clean$module == Meta2[i,1],c("id","MM_score",Meta2[i,2])]
temp_df <- na.omit(temp_df[order(temp_df[,3]),])
### Filter by GS and MM
temp_df$ABS_MM <- abs(temp_df[,2])
temp_df$ABS_GS <- abs(temp_df[,3])
temp_df$Group <- "FALSE"
temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
temp_clean <- temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,]
temp_clean <- left_join(temp_clean,GeneID, by = "id")
write.csv(temp_clean,
paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta2[i,1], "_",Meta2[i,2], ".csv"), quote = F, row.names = F)
assign(paste0(Meta2[i,1], "_",Meta2[i,2], "_LIC_item"), temp_df)
}
### Checked the merged file
head(temp_clean, 20)
### List item for further plot
Meta2_list <- ls(pattern = paste0("_LIC_item"))
Meta2_list
## [1] "cyan_A_LIC_item" "cyan_Ci_LIC_item"
## [3] "cyan_Tair_LIC_item" "cyan_Tleaf_LIC_item"
## [5] "darkmagenta_A_LIC_item" "darkmagenta_Ci_LIC_item"
## [7] "darkmagenta_gsw_LIC_item" "darkmagenta_Tair_LIC_item"
## [9] "darkmagenta_Tleaf_LIC_item" "darkorange_Fo._LIC_item"
## [11] "darkturquoise_A_LIC_item" "grey60_A_LIC_item"
## [13] "grey60_Fv..Fm._LIC_item" "lightcyan_Fo._LIC_item"
## [15] "lightyellow_Fm._LIC_item" "lightyellow_Fv._LIC_item"
## [17] "lightyellow_Fv..Fm._LIC_item" "magenta_Ci_LIC_item"
## [19] "magenta_Fm._LIC_item" "magenta_WUEi_LIC_item"
## [21] "red_Tair_LIC_item" "red_Tleaf_LIC_item"
## [23] "salmon_A_LIC_item" "salmon_Ci_LIC_item"
## [25] "salmon_Fv..Fm._LIC_item" "salmon_gsw_LIC_item"
## [27] "tan_Fm._LIC_item" "tan_Fv._LIC_item"
## [29] "tan_Fv..Fm._LIC_item" "violet_Fo._LIC_item"
for (i in 1:length(Meta2_list)){
which_module <- Meta2_list[i]
### Manual plot samples
print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
geom_point(size=1.5)+
scale_colour_manual(values=c("grey60", "#DE6757")) +
theme_bw()+
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+
labs(x=paste0("Module Membership in ", Meta1_list[i] ),
y=paste0("Gene significance for ", Meta1_list[i]))+
theme(axis.title.x =element_text(size=14),
axis.title.y=element_text(size=14),
axis.text = element_text(size = 12),
axis.text.x = element_text(colour = "black"),
axis.text.y = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
plot.margin = unit(rep(2,4),'lines')) +
theme(legend.position = 'none')+
geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5))
}
Meta3 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_Metabolites_meta.csv", header = T)
Meta3
### Create sub gene list
for (i in 1:nrow(Meta3)){
temp.df <- Metabolites_clean[Metabolites_clean$module == Meta3[i,1],c("id","MM_score",Meta3[i,2])]
temp_df <- na.omit(temp.df[order(temp.df[,3]),])
### Filter by GS and MM
temp_df$ABS_MM <- abs(temp_df[,2])
temp_df$ABS_GS <- abs(temp_df[,3])
temp_df$Group <- "FALSE"
temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
temp_clean <- temp_df[temp_df$ABS_MM > 0.6 & temp_df$ABS_GS > 0.5,]
temp_clean <- left_join(temp_clean,GeneID, by = "id")
write.csv(temp_clean, paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta3[i,1], "_",Meta3[i,2], ".csv"), quote = F, row.names = F)
assign(paste0(Meta3[i,1], "_",Meta3[i,2], "_Metabolites_item"), temp_df)
}
### Checked the merged file
head(temp_clean, 20)
### List item for further plot
Meta3_list <- ls(pattern = paste0("_Metabolites_item"))
Meta3_list
## [1] "blue_Alpha_ketoglutaric_acid_Metabolites_item"
## [2] "blue_D.malic_acid_Metabolites_item"
## [3] "blue_Ethanolamine_Metabolites_item"
## [4] "blue_Glyceric_acid_Metabolites_item"
## [5] "blue_Succinic_acid_Metabolites_item"
## [6] "darkmagenta_Galactonic_acid_2_Metabolites_item"
## [7] "darkolivegreen_Ethanolamine_Metabolites_item"
## [8] "darkolivegreen_Glyceric_acid_Metabolites_item"
## [9] "darkorange_Beta.alanine_1_Metabolites_item"
## [10] "darkorange_Ethanolamine_Metabolites_item"
## [11] "darkorange2_Beta.alanine_1_Metabolites_item"
## [12] "darkorange2_D.glucose_1_Metabolites_item"
## [13] "darkorange2_Fructose_1_Metabolites_item"
## [14] "lightsteelblue1_Alpha_ketoglutaric_acid_Metabolites_item"
## [15] "lightyellow_Chlorogenic_acid_4_Metabolites_item"
## [16] "lightyellow_D.glucose_1_Metabolites_item"
## [17] "lightyellow_Ferulic_acid_Metabolites_item"
## [18] "lightyellow_Galactonic_acid_2_Metabolites_item"
## [19] "lightyellow_Glyceric_acid_Metabolites_item"
## [20] "magenta_Beta.alanine_1_Metabolites_item"
## [21] "magenta_Chlorogenic_acid_3_Metabolites_item"
## [22] "magenta_Chlorogenic_acid_4_Metabolites_item"
## [23] "magenta_DL.isoleucine_2_Metabolites_item"
## [24] "magenta_GABA_Metabolites_item"
## [25] "magenta_Galactonic_acid_2_Metabolites_item"
## [26] "salmon_Chlorogenic_acid_3_Metabolites_item"
## [27] "salmon_Chlorogenic_acid_4_Metabolites_item"
## [28] "salmon_Galactonic_acid_2_Metabolites_item"
## [29] "skyblue_Alpha_ketoglutaric_acid_Metabolites_item"
## [30] "skyblue_Chlorogenic_acid_3_Metabolites_item"
## [31] "skyblue_Glyceric_acid_Metabolites_item"
## [32] "violet_Citric_acid_Metabolites_item"
for (i in 1:length(Meta3_list)){
which_module <- Meta3_list[i]
### Manual plot samples
print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
geom_point(size=1.5)+
scale_colour_manual(values=c("grey60", "#DE6757")) +
theme_bw()+
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+
labs(x=paste0("Module Membership in ", Meta3_list[i] ),
y=paste0("Gene significance for ", Meta3_list[i]))+
theme(axis.title.x =element_text(size=14),
axis.title.y=element_text(size=14),
axis.text = element_text(size = 12),
axis.text.x = element_text(colour = "black"),
axis.text.y = element_text(colour = "black"),
plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
plot.margin = unit(rep(2,4),'lines')) +
theme(legend.position = 'none')+
geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5))
}