Profiles analysis

Visulize the heatmap using expression data and PTMs data

Load packages

library(dplyr)
library(pheatmap)
library(stringr)
library(zFPKM)
library(RColorBrewer)
library(viridis)
library(matrixStats)
library(ggplot2)
library(ggpubr)

### Library for Pearson correlation
library(Hmisc)

Filter module membership for each gene (MM)

MM_score <- read.csv("/Users/leon/Documents/Project/Sorghum/CorrelationGene_module_membership_vsd100.csv", header= T)
colnames(MM_score)[1] <- "id"
colnames(MM_score) <- str_replace(colnames(MM_score), "MM", "")
Module_color <- colnames(MM_score[,-1])

### Load ids which associated with the module color
TranscriptsID <- read.table("/Users/leon/Documents/Project/Sorghum/Transcripts.matchinglist.txt", header = T)
head(TranscriptsID, 20)
#Extracting certain module
for (i in 1:length(Module_color)){
  ID_temp <- TranscriptsID[TranscriptsID$module == Module_color[i], ]
  MM_temp <- MM_score[,c("id", Module_color[i])]
  MM_summary <- left_join(ID_temp,MM_temp, by = "id")
  colnames(MM_summary)[3] <- "MM_score"
  
  assign(paste0(Module_color[i], "_MM_summary"),MM_summary)
}

MM_list <- lapply(ls(pattern = "_MM_summary"), get)
MM_combine <- bind_rows(MM_list)

head(MM_combine, 20) 
write.csv(MM_combine, "/Users/leon/Documents/Project/Sorghum/3_MM_vsd100_clean.csv")

Load TPM data and selected genes for visulizations

### Load TPM of selected genes
Zscore_vsd <- read.csv("/Users/leon/Documents/Project/Sorghum/3_Zscore_vsd_20230101.csv", header = T, row.names = 1)
head(Zscore_vsd, 10)
RMF <- read.csv("/Users/leon/Documents/Project/Sorghum/3_RMF-20230101.csv", header = T, row.names = 1)
head(RMF, 10)
Zscore_vsd_module <- left_join(MM_combine[,1:2],Zscore_vsd %>% mutate(id = rownames(Zscore_vsd)), by = "id")
rownames(Zscore_vsd_module) <- Zscore_vsd_module$id
Zscore_vsd_module <- Zscore_vsd_module[,-1]

RMF_module <- left_join(MM_combine[,1:2],RMF %>% mutate(id = rownames(RMF)), by = "id")
#RMF_module[is.na(RMF_module)] <- 0
RMF_module <- na.omit(RMF_module)
rownames(RMF_module) <- RMF_module$id
RMF_module <- RMF_module[,-1]
RMF_module <- RMF_module[,-50]

head(Zscore_vsd_module,20)
head(RMF_module,20)

Load metafeatures

sample_meta <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/12_Heatmap/Heatmap_MetaTable.csv", header = T, row.names = 1)

sample_meta

Plot heatmap based on assigned metafeature and Z-score transformed expression value

Heatmap 1 is gene expression profile Heatmap 2 is PTM profile

module_name <- Zscore_vsd_module$module %>% unique()

for (i in 1:length(module_name)){
  ### define annotation coloes
  annotation_colors = list(
    Condition = c(WL="tomato", WW="steelblue"),
    Time = c(TP1="#D8DAEB", TP3="#B2ABD2", TP5="#8073AC", TP7 ="#542788"),
    Accession = c(A="#1B9E77", B="#D95F02", C="#666666", D="#E7298A", E="#66A61E", F="#E6AB02"))

  Module_Plot1 <- Zscore_vsd_module[Zscore_vsd_module$module == module_name[i],-1]
  ### check if colnames and rownaems of annotation matched genes names and the samples namases
  all(rownames(sample_meta) %in% colnames(Module_Plot1))
  
  par(mfrow = c(1,2));
  pheatmap(Module_Plot1, annotation_col = sample_meta[,1:3],
               color = inferno(30),
               annotation_colors = annotation_colors, 
               clustering_distance_r = "correlation", 
               show_colnames =T,show_rownames = F,cluster_rows = T, cluster_cols = T,
               fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)
  
  Module_Plot2 <- RMF_module[RMF_module$module == module_name[i],-1]
  ### check if colnames and rownaems of annotation matched genes names and the samples namases
  all(rownames(sample_meta) %in% colnames(Module_Plot2))
  
  pheatmap(Module_Plot2, annotation_col = sample_meta[,1:3],
               color = inferno(30),
               annotation_colors = annotation_colors, 
               clustering_distance_r = "euclidean", 
               show_colnames =T,show_rownames = F,cluster_rows = T, cluster_cols = T,
               fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)
 }

## Select genes associated with high GS-MM correlation

Load MM and GS files

LIM <- read.csv("/Users/leon/Documents/Project/Sorghum/LIM_GS.csv", header = T)
colnames(LIM)[1] = "id"
Oxidative <- read.csv("/Users/leon/Documents/Project/Sorghum/Oxidative_GS.csv", header = T)
colnames(Oxidative)[1] = "id"
Metabolite <- read.csv("/Users/leon/Documents/Project/Sorghum/Metabolites_GS.csv", header = T)
colnames(Metabolite)[1] = "id"

MM <- read.csv("/Users/leon/Documents/Project/Sorghum/3_MM_vsd100_clean.csv", 
               header = T, row.names = 1)
head(MM,20)
### lable the module information under the GS files
LIM_clean <- left_join(MM, LIM, by = "id")
Oxidative_clean <- left_join(MM, Oxidative, by = "id")
Metabolites_clean <- left_join(MM, Metabolite, by = "id")

head(LIM_clean, 20)
head(Oxidative_clean, 20)
head(Metabolites_clean, 20)

Load gene annotation information

GeneID <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/vsd100/Functional_annotation.csv",header = T)
head(GeneID, 20) 

Load metatable containing the interested module-trait combinations (oxdative data)

Meta1 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_oxidative_meta.csv", header = T)
Meta1 
### Create sub gene list 
for (i in 1:nrow(Meta1)){
  temp_df <- Oxidative_clean[Oxidative_clean$module == Meta1[i,1],c("id","MM_score",Meta1[i,2])]
  temp_df <- na.omit(temp_df[order(temp_df[,3]),])
  ### Filter by GS and MM
  temp_df$ABS_MM <- abs(temp_df[,2])
  temp_df$ABS_GS <- abs(temp_df[,3])
  temp_df$Group <- "FALSE"
  temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
  temp_clean <- temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,]
  temp_clean <- left_join(temp_clean,GeneID, by = "id")
  
  write.csv(temp_clean, 
            paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta1[i,1], "_",Meta1[i,2], ".csv"), quote = F, row.names = F)
  assign(paste0(Meta1[i,1], "_",Meta1[i,2], "_oxidative_item"), temp_df)
}

### Checked the merged file
head(temp_clean, 20)
### List item for further plot
Meta1_list <- ls(pattern = paste0("_oxidative_item"))
Meta1_list
##  [1] "cyan_flav_oxidative_item"             
##  [2] "cyan_Grxs_oxidative_item"             
##  [3] "cyan_polyph_oxidative_item"           
##  [4] "cyan_TAC_oxidative_item"              
##  [5] "darkgrey_Grxs_oxidative_item"         
##  [6] "darkmagenta_ascorbate_oxidative_item" 
##  [7] "darkmagenta_flav_oxidative_item"      
##  [8] "darkmagenta_polyph_oxidative_item"    
##  [9] "darkmagenta_TAC_oxidative_item"       
## [10] "ivory_APX_oxidative_item"             
## [11] "ivory_CAT_oxidative_item"             
## [12] "ivory_flav_oxidative_item"            
## [13] "ivory_HPR_oxidative_item"             
## [14] "ivory_polyph_oxidative_item"          
## [15] "ivory_pro_oxidative_item"             
## [16] "ivory_Prxs_oxidative_item"            
## [17] "ivory_TAC_oxidative_item"             
## [18] "paleturquoise_total_AA_oxidative_item"
## [19] "plum1_flav_oxidative_item"            
## [20] "salmon_flav_oxidative_item"           
## [21] "salmon_polyph_oxidative_item"         
## [22] "salmon_TAC_oxidative_item"            
## [23] "yellow_total_AA_oxidative_item"       
## [24] "yellowgreen_Frxs_oxidative_item"      
## [25] "yellowgreen_GPX_oxidative_item"       
## [26] "yellowgreen_Trxs_oxidative_item"

Plot MM-GS scater plot with cutoff marked (Osdative stress data)

for (i in 1:length(Meta1_list)){
  which_module <- Meta1_list[i]
      ### Manual plot samples
    print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
      geom_point(size=1.5)+
      scale_colour_manual(values=c("grey60", "#DE6757")) + 
      theme_bw()+ 
      theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+  
      labs(x=paste0("Module Membership in ", Meta1_list[i] ), 
           y=paste0("Gene significance for ", Meta1_list[i]))+
      theme(axis.title.x =element_text(size=14), 
            axis.title.y=element_text(size=14),
            axis.text = element_text(size = 12),
            axis.text.x = element_text(colour = "black"),
            axis.text.y = element_text(colour = "black"),
            plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
            plot.margin = unit(rep(2,4),'lines')) +
      theme(legend.position = 'none')+
      geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
      geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5)) 
}

Load metatable containing the interested module-trait combinations (LIC data)

Meta2 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_LIC_meta.csv", header = T)
Meta2

Build the LIC - module selection

### Create sub gene list 
for (i in 1:nrow(Meta2)){
  temp_df <- LIM_clean[LIM_clean$module == Meta2[i,1],c("id","MM_score",Meta2[i,2])]
  temp_df <- na.omit(temp_df[order(temp_df[,3]),])
  ### Filter by GS and MM
  temp_df$ABS_MM <- abs(temp_df[,2])
  temp_df$ABS_GS <- abs(temp_df[,3])
  temp_df$Group <- "FALSE"
  temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
  temp_clean <- temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,]
  temp_clean <- left_join(temp_clean,GeneID, by = "id")
  
  write.csv(temp_clean, 
            paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta2[i,1], "_",Meta2[i,2], ".csv"), quote = F, row.names = F)
  assign(paste0(Meta2[i,1], "_",Meta2[i,2], "_LIC_item"), temp_df)
}

### Checked the merged file
head(temp_clean, 20)
### List item for further plot
Meta2_list <- ls(pattern = paste0("_LIC_item"))
Meta2_list
##  [1] "cyan_A_LIC_item"              "cyan_Ci_LIC_item"            
##  [3] "cyan_Tair_LIC_item"           "cyan_Tleaf_LIC_item"         
##  [5] "darkmagenta_A_LIC_item"       "darkmagenta_Ci_LIC_item"     
##  [7] "darkmagenta_gsw_LIC_item"     "darkmagenta_Tair_LIC_item"   
##  [9] "darkmagenta_Tleaf_LIC_item"   "darkorange_Fo._LIC_item"     
## [11] "darkturquoise_A_LIC_item"     "grey60_A_LIC_item"           
## [13] "grey60_Fv..Fm._LIC_item"      "lightcyan_Fo._LIC_item"      
## [15] "lightyellow_Fm._LIC_item"     "lightyellow_Fv._LIC_item"    
## [17] "lightyellow_Fv..Fm._LIC_item" "magenta_Ci_LIC_item"         
## [19] "magenta_Fm._LIC_item"         "magenta_WUEi_LIC_item"       
## [21] "red_Tair_LIC_item"            "red_Tleaf_LIC_item"          
## [23] "salmon_A_LIC_item"            "salmon_Ci_LIC_item"          
## [25] "salmon_Fv..Fm._LIC_item"      "salmon_gsw_LIC_item"         
## [27] "tan_Fm._LIC_item"             "tan_Fv._LIC_item"            
## [29] "tan_Fv..Fm._LIC_item"         "violet_Fo._LIC_item"

Plot MM-GS scater plot with cutoff marked for LIC data

for (i in 1:length(Meta2_list)){
  which_module <- Meta2_list[i]
      ### Manual plot samples
    print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
      geom_point(size=1.5)+
      scale_colour_manual(values=c("grey60", "#DE6757")) + 
      theme_bw()+ 
      theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+  
      labs(x=paste0("Module Membership in ", Meta1_list[i] ), 
           y=paste0("Gene significance for ", Meta1_list[i]))+
      theme(axis.title.x =element_text(size=14), 
            axis.title.y=element_text(size=14),
            axis.text = element_text(size = 12),
            axis.text.x = element_text(colour = "black"),
            axis.text.y = element_text(colour = "black"),
            plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
            plot.margin = unit(rep(2,4),'lines')) +
      theme(legend.position = 'none')+
      geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
      geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5)) 
}

Interested module-trait combinations (metabolites data)

Meta3 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_Metabolites_meta.csv", header = T)
Meta3 

Build the metabolites - module selection

### Create sub gene list 
for (i in 1:nrow(Meta3)){
  temp.df <- Metabolites_clean[Metabolites_clean$module == Meta3[i,1],c("id","MM_score",Meta3[i,2])]
  temp_df <- na.omit(temp.df[order(temp.df[,3]),])
  ### Filter by GS and MM
  temp_df$ABS_MM <- abs(temp_df[,2])
  temp_df$ABS_GS <- abs(temp_df[,3])
  temp_df$Group <- "FALSE"
  temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
  temp_clean <- temp_df[temp_df$ABS_MM > 0.6 & temp_df$ABS_GS > 0.5,]
  temp_clean <- left_join(temp_clean,GeneID, by = "id")
  
  write.csv(temp_clean, paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta3[i,1], "_",Meta3[i,2], ".csv"), quote = F, row.names = F)
  assign(paste0(Meta3[i,1], "_",Meta3[i,2], "_Metabolites_item"), temp_df)
}

### Checked the merged file
head(temp_clean, 20)
### List item for further plot
Meta3_list <- ls(pattern = paste0("_Metabolites_item"))
Meta3_list
##  [1] "blue_Alpha_ketoglutaric_acid_Metabolites_item"           
##  [2] "blue_D.malic_acid_Metabolites_item"                      
##  [3] "blue_Ethanolamine_Metabolites_item"                      
##  [4] "blue_Glyceric_acid_Metabolites_item"                     
##  [5] "blue_Succinic_acid_Metabolites_item"                     
##  [6] "darkmagenta_Galactonic_acid_2_Metabolites_item"          
##  [7] "darkolivegreen_Ethanolamine_Metabolites_item"            
##  [8] "darkolivegreen_Glyceric_acid_Metabolites_item"           
##  [9] "darkorange_Beta.alanine_1_Metabolites_item"              
## [10] "darkorange_Ethanolamine_Metabolites_item"                
## [11] "darkorange2_Beta.alanine_1_Metabolites_item"             
## [12] "darkorange2_D.glucose_1_Metabolites_item"                
## [13] "darkorange2_Fructose_1_Metabolites_item"                 
## [14] "lightsteelblue1_Alpha_ketoglutaric_acid_Metabolites_item"
## [15] "lightyellow_Chlorogenic_acid_4_Metabolites_item"         
## [16] "lightyellow_D.glucose_1_Metabolites_item"                
## [17] "lightyellow_Ferulic_acid_Metabolites_item"               
## [18] "lightyellow_Galactonic_acid_2_Metabolites_item"          
## [19] "lightyellow_Glyceric_acid_Metabolites_item"              
## [20] "magenta_Beta.alanine_1_Metabolites_item"                 
## [21] "magenta_Chlorogenic_acid_3_Metabolites_item"             
## [22] "magenta_Chlorogenic_acid_4_Metabolites_item"             
## [23] "magenta_DL.isoleucine_2_Metabolites_item"                
## [24] "magenta_GABA_Metabolites_item"                           
## [25] "magenta_Galactonic_acid_2_Metabolites_item"              
## [26] "salmon_Chlorogenic_acid_3_Metabolites_item"              
## [27] "salmon_Chlorogenic_acid_4_Metabolites_item"              
## [28] "salmon_Galactonic_acid_2_Metabolites_item"               
## [29] "skyblue_Alpha_ketoglutaric_acid_Metabolites_item"        
## [30] "skyblue_Chlorogenic_acid_3_Metabolites_item"             
## [31] "skyblue_Glyceric_acid_Metabolites_item"                  
## [32] "violet_Citric_acid_Metabolites_item"

Plot MM-GS scater plot with cutoff marked (Metabolites data)

for (i in 1:length(Meta3_list)){
  which_module <- Meta3_list[i]
      ### Manual plot samples
    print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
      geom_point(size=1.5)+
      scale_colour_manual(values=c("grey60", "#DE6757")) + 
      theme_bw()+ 
      theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+  
      labs(x=paste0("Module Membership in ", Meta3_list[i] ), 
           y=paste0("Gene significance for ", Meta3_list[i]))+
      theme(axis.title.x =element_text(size=14), 
            axis.title.y=element_text(size=14),
            axis.text = element_text(size = 12),
            axis.text.x = element_text(colour = "black"),
            axis.text.y = element_text(colour = "black"),
            plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
            plot.margin = unit(rep(2,4),'lines')) +
      theme(legend.position = 'none')+
      geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
      geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5)) 
}