Co-expression network construction (WGCNA)

Prior installation and configuration step

install.packages("WGCNA")
install.packages(c("matrixStats", "Hmisc", "splines", "foreach", "doParallel", "fastcluster", "dynamicTreeCut", "survival"))

if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install(version = "3.14")
BiocManager::install(c("GO.db", "preprocessCore", "impute"))
install.packages("Rcpp")
install.packages("xfun")

Load libraries

library(WGCNA)
library(cluster)
library(dplyr)
library(pheatmap)
library(stringr)
library(RColorBrewer)
library(viridis)
library(matrixStats)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(cowplot)
library(knitr)

# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
enableWGCNAThreads(nThreads = 10)
## Allowing parallel execution with up to 10 working processes.

Load envrioment

NOTE: This step will be executed only if you’ve saved working.RData file, otherwise should just skip

load("/Users/leon/Documents/Project/Sorghum/Sorghum_Range28.RData")

STEP 1.1 Load expression data and meta-table

#Load expression
TPM <- read.csv("/Users/leon/Documents/Project/Sorghum/Sorghum_20220629_clean_vsd_update96.csv", header = T)
#rename the column name
rownames(TPM) <- TPM$Name
TPM <- TPM[,-1]

#Reorder samples orders by colnames
TPM <- TPM[ , order(names(TPM))]
head(TPM)

STEP 1.2 Calculate the average numbers for each two replicates

TPM_clean <- data.frame(matrix(ncol = 48, nrow = nrow(TPM)))

Trait <- read.csv("/Users/leon/Documents/Project/Sorghum/Sorghum_oxidative_stress-Feb2022.csv", 
                  header = T, row.names = 1)

vector <- unique(Trait$ID)
rownames(TPM_clean) <- rownames(TPM)
colnames(TPM_clean) <- vector

#Loop calculation for each accesion average
for (i in 1:length(vector)){
  TPM_clean[,grepl(vector[i],colnames(TPM_clean))] <- rowMeans(TPM[,grepl(vector[i],colnames(TPM))])
}

head(TPM_clean)

STEP 1.3 Load Phenotype information

#load oxdative stress table for average
Trait <- read.csv("/Users/leon/Documents/Project/Sorghum/Sorghum_oxidative_stress-Feb2022.csv", 
                  header = T, row.names = 1)
datTrait <- Trait[, 6:34]

datTrait_ave <- data.frame(matrix(ncol = 29, nrow = 48))
vector <- unique(Trait$ID)
colnames(datTrait_ave) <- colnames(datTrait)
rownames(datTrait_ave) <- vector
### Calculate the average by loop
for (i in 1:length(vector)){
  datTrait_ave[grepl(vector[i],rownames(datTrait_ave)),] <- colMeans(datTrait[grepl(vector[i],rownames(datTrait)),])
}

datTrait 
#load LI_6800 table for average (morning data)
LI1 <- read.csv("/Users/leon/Documents/Project/Sorghum/LI6800_clean-20220324-morning.csv", 
                  header = T, row.names = 1)
dat_LI1 <- LI1[, 7:18]

LI1_ave <- data.frame(matrix(ncol = 12, nrow = 48))
vector1 <- unique(LI1$ID)
colnames(LI1_ave) <- colnames(dat_LI1 )
rownames(LI1_ave) <- vector1

### Calculate the average by loop
for (i in 1:length(vector1)){
  LI1_ave[grepl(vector1[i],rownames(LI1_ave)),] <- colMeans(dat_LI1[grepl(vector1[i],rownames(dat_LI1)),])
}

LI1_ave
#load LI_6800 table for average (afternoon data)
LI2 <- read.csv("/Users/leon/Documents/Project/Sorghum/LI6800_clean-20220324-afternoon.csv", 
                  header = T, row.names = 1)
dat_LI2 <- LI2[, 7:18]

LI2_ave <- data.frame(matrix(ncol = 12, nrow = 48))
vector2 <- unique(LI2$ID)
colnames(LI2_ave) <- colnames(dat_LI2)
rownames(LI2_ave) <- vector2

### Calculate the average by loop
for (i in 1:length(vector2)){
  LI2_ave[grepl(vector2[i],rownames(LI2_ave)),] <- colMeans(dat_LI2[grepl(vector2[i],rownames(dat_LI2)),])
}

LI2_ave
#load metabolites data for average
Metabolites <- read.csv("/Users/leon/Documents/Project/Sorghum/Sorghum_Metabolites-Feb2023.csv", 
                  header = T, row.names = 1)
dat_Metabolites <-Metabolites[, 6:48]

Metabolites_ave <- data.frame(matrix(ncol = 43, nrow = 48))
vector3 <- unique(Metabolites$ID)
colnames(Metabolites_ave) <- colnames(dat_Metabolites)
rownames(Metabolites_ave) <- vector3

### Calculate the average by loop
for (i in 1:length(vector3)){
  Metabolites_ave[grepl(vector3[i],rownames(Metabolites_ave)),] <- colMeans(dat_Metabolites[grepl(vector3[i],rownames(dat_Metabolites)),])
}

Metabolites_ave

STEP 1. Load genes with high MAD score

datExpr <- as.data.frame(t(TPM_clean[order(apply(TPM_clean,1,mad), decreasing = T)[1:22314],]))
#write.csv(data.frame(transcripts_ID = colnames(datExpr)),"/home/liangyu/1_Project/8_Sorghum/3_WGCNA/MAD85_TranscriptsID.csv", quote=F, row.names = F)

### Transform TPM into Z-score
scale_rows = function(x){
    m = apply(x, 1, mean, na.rm = T)
    s = apply(x, 1, sd, na.rm = T)
    return((x - m) / s)
}

Zscore_vsd<- scale_rows(as.data.frame(t(datExpr)))
head(Zscore_vsd,100)
write.csv(Zscore_vsd, "/Users/leon/Documents/Project/Sorghum/Zscore_vsd_20230101.csv", quote = F)

STEP 2. Calculate the soft thresholding power (beta)

enableWGCNAThreads(nThreads = 10)

powers = c(c(1:10), seq(from = 12, to=30, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Pring estimated power
sft$powerEstimate
k <- softConnectivity(datE=datExpr,power=sft$powerEstimate) 
# Plot the results:
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

par(mfrow = c(1,2));
hist(k)
scaleFreePlot(k,main="Check Scale free topology\n")

STEP3.1. Step-by-step network construction and module detection

#Co-expression similarity and adjacency
softPower = sft$powerEstimate;
adjacency = adjacency(datExpr, power = softPower)

# Turn adjacency into topological overlap
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM

# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average");
# Plot the dendrogram and colors underneath
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")

STEP 3.2 Merging of modules whose expression profiles are very similar

# Calculate eigengenes
MEList = moduleEigengenes(datExpr, colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
### Chose the cutoff for tree height
### NOTE: Corrssponded to the genes with 0.8 corelation coefficient will be merged into one module

MEDissThres = 0.25

# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
##  mergeCloseModules: Merging modules whose distance is less than 0.25
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 46 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 37 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 36 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 36 module eigengenes in given set.
# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;

### Plot the final modules been merged 
#sizeGrWindow(12, 9)
#pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

### Export the count data
Module_count <- as.data.frame(table(mergedColors))
write.csv(Module_count, "/Users/leon/Documents/Project/Sorghum/module-counts.csv")

Module_count

STEP 4. Plot clustereing among samples

#check the sample names and gene names
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

#cluster tress (check if any outliers existed among samples)
datExpr_tree <- hclust(dist(datExpr), method = "average")
par(mar = c(0,5,2,0))
plot(datExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2, 
     cex.axis = 1, cex.main = 1,cex.lab=1)

STEP 6 Compare the epigengene values across samples from each module

#Build the epignegene value matrix for plot
MEs0 <- moduleEigengenes(datExpr, mergedColors)$eigengenes
MEs0$Sample <- rownames(MEs0)
MEs0$Name <- rownames(MEs0)
MEs <- separate(MEs0, Name, c("Accession", "Condition", "Time"))


### Plot Eigengene value for each module in a line graph mode
for (i in 1:nrow(Module_count)){
  plot1 <- ggplot(MEs[MEs$Condition =="WL",],aes(x = Time,y = get(paste0("ME", Module_count[i,1])), color = Accession)) +
  geom_point(size = 1) +
  geom_line(alpha = 0.3,size = 1, aes(group= Accession)) +
  theme_bw(base_size = 10) +
  theme(axis.text.x=element_text(angle=90)) + ylab(paste0("WL Eigengene value of ", Module_count[i,1]))

  plot2 <- ggplot(MEs[MEs$Condition =="WW",],aes(x = Time,y = get(paste0("ME", Module_count[i,1])), color = Accession)) +
  geom_point(size = 1) +
  geom_line(alpha = 0.3,size = 1, aes(group= Accession)) +
  theme_bw(base_size = 10) +
  theme(axis.text.x=element_text(angle=90)) + ylab(paste0("WW Eigengene value of ", Module_count[i,1]))
  
  ###  combine plot and export into pdf
  #pdf(paste0("/mnt/Knives/1_data/Sorghum_WGCNA/Results/vsd100/Eigene-value/eigengene_", Module_count[i,1], ".pdf", sep = ""), width = 8, height = 4)
  print(plot_grid(plot1, plot2, labels = c('A', 'B'), label_size = 12))
  #dev.off()
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

STEP 7. Module-trait membership correlation

STEP 7.1

nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

moduleColors <- mergedColors
MEs0 <- moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs <- orderMEs(MEs0)

corType = "pearson"
robustY = ifelse(corType=="pearson",T,F)

STEP 7.2 Build correlation between oxdative status data and expression module

##Build correlation between oxdative status data and expression module
if (corType=="pearson") {
  modOXCor = cor(MEs, datTrait_ave[,colnames(datTrait_ave)[1:29]], use = "p")
  modOXP = corPvalueStudent(modOXCor, nSamples)
} else {
  modOXCorP = bicorAndPvalue(MEs_col, colnames(datTrait_ave[1:29]) , robustY=robustY)
  modOXCor = modOXCorP$bicor
  modOXP   = modOXCorP$p
}

textMatrix1 = paste(signif(modOXCor, 2), "\n(", signif(modOXP, 1), ")", sep = "")
dim(textMatrix1) = dim(modOXCor)

modOXCor_results <- as.data.frame(modOXCor)
modOXP_results <- as.data.frame(modOXP)

min(modOXCor_results)
## [1] -0.7315044
max(modOXCor_results)
## [1] 0.7610047
write.csv(modOXCor_results,
          "/Users/leon/Documents/Project/Sorghum/2_Oxdative_vsd100_Correlation.csv", quote = F)
modOXCor_results
write.csv(modOXP_results,
          "/Users/leon/Documents/Project/Sorghum/2_Oxdative_vsd100_Pvalue.csv", quote = F)
modOXP_results
### Plot the correlation heatmap
labeledHeatmap(Matrix = modOXCor, xLabels = colnames(datTrait_ave[1:29]), 
               yLabels = colnames(MEs), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs), colorLabels = TRUE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix1, setStdMargins = TRUE, 
               cex.text = 0.25, zlim = c(-1,1),
               main = paste("Module - Oxdative-Stress data relationships"))

STEP 7.3 Build correlation between LI6800 data and expression module (morning data/afternoon data)

###Build correlation between LI6800 data and expression module (morning)
if (corType=="pearson") {
  modLI1Cor = cor(MEs, LI1_ave[,colnames(LI1_ave)[1:12]], use = "p")
  modLI1P = corPvalueStudent(modLI1Cor, nSamples)
} else {
  modLI1CorP = bicorAndPvalue(MEs_col, colnames(LI1_ave[1:12]) , robustY=robustY)
  modLI1Cor = modLI1CorP$bicor
  modLI1P   = modLI1CorP$p
}

textMatrix2 = paste(signif(modLI1Cor, 2), "\n(", signif(modLI1P, 1), ")", sep = "")
dim(textMatrix2) = dim(modLI1Cor)

modLI1Cor_results <- as.data.frame(modLI1Cor)
modLI1P_results <- as.data.frame(modLI1P)
min(modLI1Cor_results)
## [1] -0.7147423
max(modLI1Cor_results)
## [1] 0.7285545
write.csv(modLI1Cor_results, "/Users/leon/Documents/Project/Sorghum/2_LIM_vsd100_correlation.csv", quote = F)
write.csv(modLI1P_results, "/Users/leon/Documents/Project/Sorghum/2_LIM_vsd100_Pvalue.csv", quote = F)

modLI1Cor_results
modLI1P_results
### Plot the correlation heatmap
labeledHeatmap(Matrix = modLI1Cor, xLabels = colnames(LI1_ave[1:12]), 
               yLabels = colnames(MEs), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs), colorLabels = TRUE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix2, setStdMargins = TRUE, 
               cex.text = 0.25, zlim = c(-1,1),
               main = paste("Module-LI6800 data relationships (Morning)"))

###Build correlation between LI6800 data and expression module (afternoon)
if (corType=="pearson") {
  modLI2Cor = cor(MEs, LI2_ave[,colnames(LI2_ave)[1:12]], use = "p")
  modLI2P = corPvalueStudent(modLI2Cor, nSamples)
} else {
  modLI2CorP = bicorAndPvalue(MEs_col, colnames(LI2_ave[1:12]) , robustY=robustY)
  modLI2Cor = modLI2CorP$bicor
  modLI2P  = modLI2CorP$p
}

textMatrix3 = paste(signif(modLI2Cor, 2), "\n(", signif(modLI2P, 1), ")", sep = "")
dim(textMatrix3) = dim(modLI2Cor)

modLI2Cor_results <- as.data.frame(modLI2Cor)
modLI2P_results <- as.data.frame(modLI2P)
min(modLI2Cor_results)
## [1] -0.710625
max(modLI2Cor_results)
## [1] 0.6827603
write.csv(modLI2Cor_results, "/Users/leon/Documents/Project/Sorghum/2_LIA_vsd100_correlation.csv", quote = F)
write.csv(modLI2P_results, "/Users/leon/Documents/Project/Sorghum/2_LIA_vsd100_Pvalue.csv", quote = F)

modLI2Cor_results
modLI2P_results
### Plot the correlation heatmap
labeledHeatmap(Matrix =  modLI2Cor, xLabels = colnames(LI2_ave[1:12]), 
               yLabels = colnames(MEs), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs), colorLabels = TRUE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix3, setStdMargins = TRUE, 
               cex.text = 0.25, zlim = c(-1,1),
               main = paste("Module-LI6800 data relationships (Afternoon)"))

###Build correlation between metabolites and gene expression
if (corType=="pearson") {
  modMetCor = cor(MEs, Metabolites_ave[,colnames(Metabolites_ave)[1:43]], use = "p")
  modMetP = corPvalueStudent(modMetCor, nSamples)
} else {
  modMetCorP = bicorAndPvalue(MEs_col, colnames(Metabolites_ave[1:43]) , robustY=robustY)
  modMetCor = modMetCorP$bicor
  modMetP  = modMetCorP$p
}

textMatrix4 = paste(signif(modMetCor, 2), "\n(", signif(modMetP, 1), ")", sep = "")
dim(textMatrix4) = dim(modMetCor)

modMetCor_results <- as.data.frame(modMetCor)
modMetP_results <- as.data.frame(modMetP)
min(modMetCor_results)
## [1] -0.6367811
max(modMetCor_results)
## [1] 0.7962088
write.csv(modMetCor_results, "/Users/leon/Documents/Project/Sorghum/2_Metabolites_vsd100_correlation.csv", quote = F)
write.csv(modMetP_results, "/Users/leon/Documents/Project/Sorghum/2_Metabolites_vsd100_Pvalue.csv", quote = F)

modMetCor_results
modMetP_results
### Plot the correlation heatmap
labeledHeatmap(Matrix =  modMetCor, xLabels = colnames(Metabolites_ave[1:43]), 
               yLabels = colnames(MEs), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs), colorLabels = TRUE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix4, setStdMargins = TRUE, 
               cex.text = 0.25, zlim = c(-1,1),
               main = paste("Module-metabolites data correlations (Afternoon)"))

Calculate the gene-module membership (MM)

# names (colors) of the modules
modNames <- substring(names(MEs), 3)
geneModuleMembership <- as.data.frame(cor(datExpr, MEs, use = "p"))

MMPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))

names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")

#Export the gene-module membership (MM table includes all modules)
write.csv(geneModuleMembership, "/Users/leon/Documents/Project/Sorghum/CorrelationGene_module_membership_vsd100.csv")
geneModuleMembership

GS_MM based on particular trait

### extract the GS of oxidative stress phenotype
Oxidative_list <- colnames(datTrait_ave)
Oxidative_GS <- data.frame(matrix(ncol = length(Oxidative_list) , nrow = ncol(datExpr)))

for (i in 1:length(Oxidative_list)){
  metab <- datTrait_ave[,colnames(datTrait_ave) == Oxidative_list[i]]
  Oxidative_GS[,i] <- as.data.frame(cor(datExpr, metab, use = "p"))
  colnames(Oxidative_GS)[i] <- Oxidative_list[i]
  rownames(Oxidative_GS) <- colnames(datExpr)
}

### export phenotype data
write.csv(Oxidative_GS, "/Users/leon/Documents/Project/Sorghum/Oxidative_GS.csv", quote = F)
head(Oxidative_GS, 50)
### extract the GS of LIC6800 phenotype
LIM_list <- colnames(LI1_ave)
LIM_GS <- data.frame(matrix(ncol = length(LIM_list) , nrow = ncol(datExpr)))

for (i in 1:length(LIM_list)){
  metab <- LI1_ave[,colnames(LI1_ave) == LIM_list[i]]
  LIM_GS[,i] <- as.data.frame(cor(datExpr, metab, use = "p"))
  colnames(LIM_GS)[i] <- LIM_list[i]
  rownames(LIM_GS) <- colnames(datExpr)
}

### export phenotype data
write.csv(LIM_GS, "/Users/leon/Documents/Project/Sorghum/LIM_GS.csv", quote = F)
head(LIM_GS, 50)
### extract the GS of LIC6800 phenotype
Metabolites_list <- colnames(Metabolites_ave)
Metabolites_GS <- data.frame(matrix(ncol = length(Metabolites_list) , nrow = ncol(datExpr)))

for (i in 1:length(Metabolites_list)){
  metab <- Metabolites_ave[,colnames(Metabolites_ave) == Metabolites_list[i]]
  Metabolites_GS[,i] <- as.data.frame(cor(datExpr, metab, use = "p"))
  colnames(Metabolites_GS)[i] <- Metabolites_list[i]
  rownames(Metabolites_GS) <- colnames(datExpr)
}

### export phenotype data
write.csv(Metabolites_GS, "/Users/leon/Documents/Project/Sorghum/Metabolites_GS.csv", quote = F)
head(Metabolites_GS, 50)

Export network for cytoscape downstream visulization

for (i in 1:nrow(Module_count)){
  # Select module
  module <- Module_count[i,1]
  # Select module probes
  probes = colnames(datExpr)
  moduleColors = mergedColors
  inModule = (moduleColors==module);
  modProbes = probes[inModule];
  # Select the corresponding Topological Overlap
  modTOM <- TOM[inModule, inModule]
  dimnames(modTOM) <- list(modProbes, modProbes)
  
  cyt <- exportNetworkToCytoscape(
  modTOM,
  edgeFile = 
  paste0("/Users/leon/Documents/Project/Sorghum/Edge/edges-vsd100", paste(module), ".csv"),
  nodeFile = 
  paste0("/Users/leon/Documents/Project/Sorghum/Node/nodes-vsd100", paste(module), ".csv"),
  weighted = TRUE,
  threshold = 0,
  nodeNames = modProbes, 
  nodeAttr = moduleColors[inModule])
}

Filter weight of network using cutoff

sample.list <- read.table("/Users/leon/Documents/Project/Sorghum/sample.list", header = F)
sample.list 
for (i in 1:nrow(sample.list)){
  sample.df <- read.csv(paste0("/Users/leon/Documents/Project/Sorghum/Edge/",
                               sample.list[i,1]), header = T, sep = "\t")
  ### sort weight data from largest to smallest
  Edge_sort <- sample.df[order(-sample.df$weight),]
  
  ### Take the 10% top edge connections
  Edge_clean <- unique(Edge_sort[1: nrow(Edge_sort)*0.1,])
  write.csv(Edge_clean, paste0("/Users/leon/Documents/Project/Sorghum/Clean_Edge/",sample.list[i,1]), row.names = F, quote = F)
  
  Node1 <- Edge_clean[,1, drop = F]
  Node2 <- Edge_clean[,2, drop = F]
  colnames(Node1) = "Transcripts"
  colnames(Node2) = "Transcripts"
  
  ### Build non-duplciated transcripts list 
  temp <- unique(bind_rows(Node1, Node2))
  Edge_count <- data.frame(table(bind_rows(Node1, Node2)$Transcripts))
  
  Node_clean <- data.frame(Node =  temp$Transcripts, Module = sample.list[i,1])
  Node_clean$Module <- str_replace(Node_clean$Module, "edges-vsd100", "") 
  Node_clean$Module <- str_replace(Node_clean$Module, ".csv", "")
  
  write.csv(Node_clean, paste0("/Users/leon/Documents/Project/Sorghum/Clean_node/",sample.list[i,1]), row.names = F, quote = F)
  write.csv(Edge_count, paste0("/Users/leon/Documents/Project/Sorghum/Edge_count/",sample.list[i,1]), row.names = F, quote = F)
  
}

### example files
head(Node_clean)
head(Edge_count)
head(Edge_clean)

Filter module membership for each gene (MM)

MM_score <- read.csv("/Users/leon/Documents/Project/Sorghum/CorrelationGene_module_membership_vsd100.csv", header= T)
colnames(MM_score)[1] <- "id"
colnames(MM_score) <- str_replace(colnames(MM_score), "MM", "")
Module_color <- colnames(MM_score[,-1])

### Load ids which associated with the module color
TranscriptsID <- read.table("/Users/leon/Documents/Project/Sorghum/Transcripts.matchinglist.txt", header = T)
head(TranscriptsID, 20)
#Extracting certain module
for (i in 1:length(Module_color)){
  ID_temp <- TranscriptsID[TranscriptsID$module == Module_color[i], ]
  MM_temp <- MM_score[,c("id", Module_color[i])]
  MM_summary <- left_join(ID_temp,MM_temp, by = "id")
  colnames(MM_summary)[3] <- "MM_score"
  
  assign(paste0(Module_color[i], "_MM_summary"),MM_summary)
}

MM_list <- lapply(ls(pattern = "_MM_summary"), get)
MM_combine <- bind_rows(MM_list)

head(MM_combine, 20) 
write.csv(MM_combine, "/Users/leon/Documents/Project/Sorghum/3_MM_vsd100_clean.csv")