Sorghum Transcriptome and PTMs analysis

Profiles analysis

Visulize the heatmap using expression data and PTMs data

Load packages

library(dplyr)
library(pheatmap)
library(stringr)
library(zFPKM)
library(RColorBrewer)
library(viridis)
library(matrixStats)
library(ggplot2)
library(ggpubr)

### Library for Pearson correlation
library(Hmisc)

Filter module membership for each gene (MM)

MM_score <- read.csv("/Users/leon/Documents/Project/Sorghum/CorrelationGene_module_membership_vsd100.csv", header= T)
colnames(MM_score)[1] <- "id"
colnames(MM_score) <- str_replace(colnames(MM_score), "MM", "")
Module_color <- colnames(MM_score[,-1])

### Load ids which associated with the module color
TranscriptsID <- read.table("/Users/leon/Documents/Project/Sorghum/Transcripts.matchinglist.txt", header = T)
head(TranscriptsID, 20)

#Extracting certain module
for (i in 1:length(Module_color)){
  ID_temp <- TranscriptsID[TranscriptsID$module == Module_color[i], ]
  MM_temp <- MM_score[,c("id", Module_color[i])]
  MM_summary <- left_join(ID_temp,MM_temp, by = "id")
  colnames(MM_summary)[3] <- "MM_score"
  
  assign(paste0(Module_color[i], "_MM_summary"),MM_summary)
}

MM_list <- lapply(ls(pattern = "_MM_summary"), get)
MM_combine <- bind_rows(MM_list)

head(MM_combine, 20)

write.csv(MM_combine, "/Users/leon/Documents/Project/Sorghum/3_MM_vsd100_clean.csv")

Load TPM data and selected genes for visulizations

### Load TPM of selected genes
Zscore_vsd <- read.csv("/Users/leon/Documents/Project/Sorghum/3_Zscore_vsd_20230101.csv", header = T, row.names = 1)
head(Zscore_vsd, 10)

RMF <- read.csv("/Users/leon/Documents/Project/Sorghum/3_RMF-20230101.csv", header = T, row.names = 1)
head(RMF, 10)

Zscore_vsd_module <- left_join(MM_combine[,1:2],Zscore_vsd %>% mutate(id = rownames(Zscore_vsd)), by = "id")
rownames(Zscore_vsd_module) <- Zscore_vsd_module$id
Zscore_vsd_module <- Zscore_vsd_module[,-1]

RMF_module <- left_join(MM_combine[,1:2],RMF %>% mutate(id = rownames(RMF)), by = "id")
#RMF_module[is.na(RMF_module)] <- 0
RMF_module <- na.omit(RMF_module)
rownames(RMF_module) <- RMF_module$id
RMF_module <- RMF_module[,-1]
RMF_module <- RMF_module[,-50]

head(Zscore_vsd_module,20)

head(RMF_module,20)

Load metafeatures

sample_meta <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/12_Heatmap/Heatmap_MetaTable.csv", header = T, row.names = 1)

sample_meta

Plot heatmap based on assigned metafeature and Z-score transformed expression value

Heatmap 1 is gene expression profile Heatmap 2 is PTM profile

module_name <- Zscore_vsd_module$module %>% unique()

for (i in 1:length(module_name)){
  ### define annotation coloes
  annotation_colors = list(
    Condition = c(WL="tomato", WW="steelblue"),
    Time = c(TP1="#D8DAEB", TP3="#B2ABD2", TP5="#8073AC", TP7 ="#542788"),
    Accession = c(A="#1B9E77", B="#D95F02", C="#666666", D="#E7298A", E="#66A61E", F="#E6AB02"))

  Module_Plot1 <- Zscore_vsd_module[Zscore_vsd_module$module == module_name[i],-1]
  ### check if colnames and rownaems of annotation matched genes names and the samples namases
  all(rownames(sample_meta) %in% colnames(Module_Plot1))
  
  par(mfrow = c(1,2));
  pheatmap(Module_Plot1, annotation_col = sample_meta[,1:3],
               color = inferno(30),
               annotation_colors = annotation_colors, 
               clustering_distance_r = "correlation", 
               show_colnames =T,show_rownames = F,cluster_rows = T, cluster_cols = T,
               fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)
  
  Module_Plot2 <- RMF_module[RMF_module$module == module_name[i],-1]
  ### check if colnames and rownaems of annotation matched genes names and the samples namases
  all(rownames(sample_meta) %in% colnames(Module_Plot2))
  
  pheatmap(Module_Plot2, annotation_col = sample_meta[,1:3],
               color = inferno(30),
               annotation_colors = annotation_colors, 
               clustering_distance_r = "euclidean", 
               show_colnames =T,show_rownames = F,cluster_rows = T, cluster_cols = T,
               fontsize_row = 4, fontsize_col = 4, angle_col = 90, border_color = NA)
 }

## Select genes associated with high GS-MM correlation

Load MM and GS files

LIM <- read.csv("/Users/leon/Documents/Project/Sorghum/LIM_GS.csv", header = T)
colnames(LIM)[1] = "id"
Oxidative <- read.csv("/Users/leon/Documents/Project/Sorghum/Oxidative_GS.csv", header = T)
colnames(Oxidative)[1] = "id"
Metabolite <- read.csv("/Users/leon/Documents/Project/Sorghum/Metabolites_GS.csv", header = T)
colnames(Metabolite)[1] = "id"

MM <- read.csv("/Users/leon/Documents/Project/Sorghum/3_MM_vsd100_clean.csv", 
               header = T, row.names = 1)
head(MM,20)

### lable the module information under the GS files
LIM_clean <- left_join(MM, LIM, by = "id")
Oxidative_clean <- left_join(MM, Oxidative, by = "id")
Metabolites_clean <- left_join(MM, Metabolite, by = "id")

head(LIM_clean, 20)

head(Oxidative_clean, 20)

head(Metabolites_clean, 20)

Load gene annotation information

GeneID <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/vsd100/Functional_annotation.csv",header = T)
head(GeneID, 20)

Load metatable containing the interested module-trait combinations (oxdative data)

Meta1 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_oxidative_meta.csv", header = T)
Meta1

### Create sub gene list 
for (i in 1:nrow(Meta1)){
  temp_df <- Oxidative_clean[Oxidative_clean$module == Meta1[i,1],c("id","MM_score",Meta1[i,2])]
  temp_df <- na.omit(temp_df[order(temp_df[,3]),])
  ### Filter by GS and MM
  temp_df$ABS_MM <- abs(temp_df[,2])
  temp_df$ABS_GS <- abs(temp_df[,3])
  temp_df$Group <- "FALSE"
  temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
  temp_clean <- temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,]
  temp_clean <- left_join(temp_clean,GeneID, by = "id")
  
  write.csv(temp_clean, 
            paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta1[i,1], "_",Meta1[i,2], ".csv"), quote = F, row.names = F)
  assign(paste0(Meta1[i,1], "_",Meta1[i,2], "_oxidative_item"), temp_df)
}

### Checked the merged file
head(temp_clean, 20)

### List item for further plot
Meta1_list <- ls(pattern = paste0("_oxidative_item"))
Meta1_list

##  [1] "cyan_flav_oxidative_item"             
##  [2] "cyan_Grxs_oxidative_item"             
##  [3] "cyan_polyph_oxidative_item"           
##  [4] "cyan_TAC_oxidative_item"              
##  [5] "darkgrey_Grxs_oxidative_item"         
##  [6] "darkmagenta_ascorbate_oxidative_item" 
##  [7] "darkmagenta_flav_oxidative_item"      
##  [8] "darkmagenta_polyph_oxidative_item"    
##  [9] "darkmagenta_TAC_oxidative_item"       
## [10] "ivory_APX_oxidative_item"             
## [11] "ivory_CAT_oxidative_item"             
## [12] "ivory_flav_oxidative_item"            
## [13] "ivory_HPR_oxidative_item"             
## [14] "ivory_polyph_oxidative_item"          
## [15] "ivory_pro_oxidative_item"             
## [16] "ivory_Prxs_oxidative_item"            
## [17] "ivory_TAC_oxidative_item"             
## [18] "paleturquoise_total_AA_oxidative_item"
## [19] "plum1_flav_oxidative_item"            
## [20] "salmon_flav_oxidative_item"           
## [21] "salmon_polyph_oxidative_item"         
## [22] "salmon_TAC_oxidative_item"            
## [23] "yellow_total_AA_oxidative_item"       
## [24] "yellowgreen_Frxs_oxidative_item"      
## [25] "yellowgreen_GPX_oxidative_item"       
## [26] "yellowgreen_Trxs_oxidative_item"

Plot MM-GS scater plot with cutoff marked (Osdative stress data)

for (i in 1:length(Meta1_list)){
  which_module <- Meta1_list[i]
      ### Manual plot samples
    print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
      geom_point(size=1.5)+
      scale_colour_manual(values=c("grey60", "#DE6757")) + 
      theme_bw()+ 
      theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+  
      labs(x=paste0("Module Membership in ", Meta1_list[i] ), 
           y=paste0("Gene significance for ", Meta1_list[i]))+
      theme(axis.title.x =element_text(size=14), 
            axis.title.y=element_text(size=14),
            axis.text = element_text(size = 12),
            axis.text.x = element_text(colour = "black"),
            axis.text.y = element_text(colour = "black"),
            plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
            plot.margin = unit(rep(2,4),'lines')) +
      theme(legend.position = 'none')+
      geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
      geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5)) 
}

Load metatable containing the interested module-trait combinations (LIC data)

Meta2 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_LIC_meta.csv", header = T)
Meta2

Build the LIC - module selection

### Create sub gene list 
for (i in 1:nrow(Meta2)){
  temp_df <- LIM_clean[LIM_clean$module == Meta2[i,1],c("id","MM_score",Meta2[i,2])]
  temp_df <- na.omit(temp_df[order(temp_df[,3]),])
  ### Filter by GS and MM
  temp_df$ABS_MM <- abs(temp_df[,2])
  temp_df$ABS_GS <- abs(temp_df[,3])
  temp_df$Group <- "FALSE"
  temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
  temp_clean <- temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,]
  temp_clean <- left_join(temp_clean,GeneID, by = "id")
  
  write.csv(temp_clean, 
            paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta2[i,1], "_",Meta2[i,2], ".csv"), quote = F, row.names = F)
  assign(paste0(Meta2[i,1], "_",Meta2[i,2], "_LIC_item"), temp_df)
}

### Checked the merged file
head(temp_clean, 20)

### List item for further plot
Meta2_list <- ls(pattern = paste0("_LIC_item"))
Meta2_list

##  [1] "cyan_A_LIC_item"              "cyan_Ci_LIC_item"            
##  [3] "cyan_Tair_LIC_item"           "cyan_Tleaf_LIC_item"         
##  [5] "darkmagenta_A_LIC_item"       "darkmagenta_Ci_LIC_item"     
##  [7] "darkmagenta_gsw_LIC_item"     "darkmagenta_Tair_LIC_item"   
##  [9] "darkmagenta_Tleaf_LIC_item"   "darkorange_Fo._LIC_item"     
## [11] "darkturquoise_A_LIC_item"     "grey60_A_LIC_item"           
## [13] "grey60_Fv..Fm._LIC_item"      "lightcyan_Fo._LIC_item"      
## [15] "lightyellow_Fm._LIC_item"     "lightyellow_Fv._LIC_item"    
## [17] "lightyellow_Fv..Fm._LIC_item" "magenta_Ci_LIC_item"         
## [19] "magenta_Fm._LIC_item"         "magenta_WUEi_LIC_item"       
## [21] "red_Tair_LIC_item"            "red_Tleaf_LIC_item"          
## [23] "salmon_A_LIC_item"            "salmon_Ci_LIC_item"          
## [25] "salmon_Fv..Fm._LIC_item"      "salmon_gsw_LIC_item"         
## [27] "tan_Fm._LIC_item"             "tan_Fv._LIC_item"            
## [29] "tan_Fv..Fm._LIC_item"         "violet_Fo._LIC_item"

Plot MM-GS scater plot with cutoff marked for LIC data

for (i in 1:length(Meta2_list)){
  which_module <- Meta2_list[i]
      ### Manual plot samples
    print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
      geom_point(size=1.5)+
      scale_colour_manual(values=c("grey60", "#DE6757")) + 
      theme_bw()+ 
      theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+  
      labs(x=paste0("Module Membership in ", Meta1_list[i] ), 
           y=paste0("Gene significance for ", Meta1_list[i]))+
      theme(axis.title.x =element_text(size=14), 
            axis.title.y=element_text(size=14),
            axis.text = element_text(size = 12),
            axis.text.x = element_text(colour = "black"),
            axis.text.y = element_text(colour = "black"),
            plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
            plot.margin = unit(rep(2,4),'lines')) +
      theme(legend.position = 'none')+
      geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
      geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5)) 
}

Interested module-trait combinations (metabolites data)

Meta3 <- read.csv("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/MM_Metabolites_meta.csv", header = T)
Meta3

Build the metabolites - module selection

### Create sub gene list 
for (i in 1:nrow(Meta3)){
  temp.df <- Metabolites_clean[Metabolites_clean$module == Meta3[i,1],c("id","MM_score",Meta3[i,2])]
  temp_df <- na.omit(temp.df[order(temp.df[,3]),])
  ### Filter by GS and MM
  temp_df$ABS_MM <- abs(temp_df[,2])
  temp_df$ABS_GS <- abs(temp_df[,3])
  temp_df$Group <- "FALSE"
  temp_df[temp_df$ABS_MM > 0.7 & temp_df$ABS_GS > 0.6,"Group"] <- "TRUE"
  temp_clean <- temp_df[temp_df$ABS_MM > 0.6 & temp_df$ABS_GS > 0.5,]
  temp_clean <- left_join(temp_clean,GeneID, by = "id")
  
  write.csv(temp_clean, paste0("/Users/Leon/OneDrive - Cornell University/Projects/9_Sorghum/7_WGCNA/GS_MM_selection/Results/",Meta3[i,1], "_",Meta3[i,2], ".csv"), quote = F, row.names = F)
  assign(paste0(Meta3[i,1], "_",Meta3[i,2], "_Metabolites_item"), temp_df)
}

### Checked the merged file
head(temp_clean, 20)

### List item for further plot
Meta3_list <- ls(pattern = paste0("_Metabolites_item"))
Meta3_list

##  [1] "blue_Alpha_ketoglutaric_acid_Metabolites_item"           
##  [2] "blue_D.malic_acid_Metabolites_item"                      
##  [3] "blue_Ethanolamine_Metabolites_item"                      
##  [4] "blue_Glyceric_acid_Metabolites_item"                     
##  [5] "blue_Succinic_acid_Metabolites_item"                     
##  [6] "darkmagenta_Galactonic_acid_2_Metabolites_item"          
##  [7] "darkolivegreen_Ethanolamine_Metabolites_item"            
##  [8] "darkolivegreen_Glyceric_acid_Metabolites_item"           
##  [9] "darkorange_Beta.alanine_1_Metabolites_item"              
## [10] "darkorange_Ethanolamine_Metabolites_item"                
## [11] "darkorange2_Beta.alanine_1_Metabolites_item"             
## [12] "darkorange2_D.glucose_1_Metabolites_item"                
## [13] "darkorange2_Fructose_1_Metabolites_item"                 
## [14] "lightsteelblue1_Alpha_ketoglutaric_acid_Metabolites_item"
## [15] "lightyellow_Chlorogenic_acid_4_Metabolites_item"         
## [16] "lightyellow_D.glucose_1_Metabolites_item"                
## [17] "lightyellow_Ferulic_acid_Metabolites_item"               
## [18] "lightyellow_Galactonic_acid_2_Metabolites_item"          
## [19] "lightyellow_Glyceric_acid_Metabolites_item"              
## [20] "magenta_Beta.alanine_1_Metabolites_item"                 
## [21] "magenta_Chlorogenic_acid_3_Metabolites_item"             
## [22] "magenta_Chlorogenic_acid_4_Metabolites_item"             
## [23] "magenta_DL.isoleucine_2_Metabolites_item"                
## [24] "magenta_GABA_Metabolites_item"                           
## [25] "magenta_Galactonic_acid_2_Metabolites_item"              
## [26] "salmon_Chlorogenic_acid_3_Metabolites_item"              
## [27] "salmon_Chlorogenic_acid_4_Metabolites_item"              
## [28] "salmon_Galactonic_acid_2_Metabolites_item"               
## [29] "skyblue_Alpha_ketoglutaric_acid_Metabolites_item"        
## [30] "skyblue_Chlorogenic_acid_3_Metabolites_item"             
## [31] "skyblue_Glyceric_acid_Metabolites_item"                  
## [32] "violet_Citric_acid_Metabolites_item"

Plot MM-GS scater plot with cutoff marked (Metabolites data)

for (i in 1:length(Meta3_list)){
  which_module <- Meta3_list[i]
      ### Manual plot samples
    print(ggplot(data=get(which_module), aes(x=ABS_MM, y=ABS_GS, color=Group)) +
      geom_point(size=1.5)+
      scale_colour_manual(values=c("grey60", "#DE6757")) + 
      theme_bw()+ 
      theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+  
      labs(x=paste0("Module Membership in ", Meta3_list[i] ), 
           y=paste0("Gene significance for ", Meta3_list[i]))+
      theme(axis.title.x =element_text(size=14), 
            axis.title.y=element_text(size=14),
            axis.text = element_text(size = 12),
            axis.text.x = element_text(colour = "black"),
            axis.text.y = element_text(colour = "black"),
            plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),
            plot.margin = unit(rep(2,4),'lines')) +
      theme(legend.position = 'none')+
      geom_hline(aes(yintercept=0.6),colour="#5B9BD5",lwd=1,linetype=5)+
      geom_vline(aes(xintercept=0.7),colour="#5B9BD5",lwd=1,linetype=5)) 
}

Sorghum Transcriptome and PTMs analysis

Liang Yu, Andrew Nelson

2022/8/25

Profiles analysis

Visulize the heatmap using expression data and PTMs data

Load packages

Filter module membership for each gene (MM)

Load TPM data and selected genes for visulizations

Load metafeatures

Plot heatmap based on assigned metafeature and Z-score transformed expression value

Load MM and GS files

Load gene annotation information

Load metatable containing the interested module-trait combinations (oxdative data)

Plot MM-GS scater plot with cutoff marked (Osdative stress data)

Load metatable containing the interested module-trait combinations (LIC data)

Build the LIC - module selection

Plot MM-GS scater plot with cutoff marked for LIC data

Interested module-trait combinations (metabolites data)

Build the metabolites - module selection

Plot MM-GS scater plot with cutoff marked (Metabolites data)