Sorghum WGCNA and module-trait correlated genes

Co-expression network construction (WGCNA)

Prior installation and configuration step

install.packages("WGCNA")
install.packages(c("matrixStats", "Hmisc", "splines", "foreach", "doParallel", "fastcluster", "dynamicTreeCut", "survival"))

if (!require("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install(version = "3.14")
BiocManager::install(c("GO.db", "preprocessCore", "impute"))
install.packages("Rcpp")
install.packages("xfun")

Load libraries

library(WGCNA)
library(cluster)
library(dplyr)
library(pheatmap)
library(stringr)
library(RColorBrewer)
library(viridis)
library(matrixStats)
library(ggplot2)
library(ggpubr)
library(tidyr)
library(cowplot)
library(knitr)

# The following setting is important, do not omit.
options(stringsAsFactors = FALSE)
enableWGCNAThreads(nThreads = 10)

## Allowing parallel execution with up to 10 working processes.

Load envrioment

NOTE: This step will be executed only if you’ve saved working.RData file, otherwise should just skip

load("/Users/leon/Documents/Project/Sorghum/Sorghum_Range28.RData")

STEP 1.1 Load expression data and meta-table

#Load expression
TPM <- read.csv("/Users/leon/Documents/Project/Sorghum/Sorghum_20220629_clean_vsd_update96.csv", header = T)
#rename the column name
rownames(TPM) <- TPM$Name
TPM <- TPM[,-1]

#Reorder samples orders by colnames
TPM <- TPM[ , order(names(TPM))]
head(TPM)

STEP 1.2 Calculate the average numbers for each two replicates

TPM_clean <- data.frame(matrix(ncol = 48, nrow = nrow(TPM)))

Trait <- read.csv("/Users/leon/Documents/Project/Sorghum/Sorghum_oxidative_stress-Feb2022.csv", 
                  header = T, row.names = 1)

vector <- unique(Trait$ID)
rownames(TPM_clean) <- rownames(TPM)
colnames(TPM_clean) <- vector

#Loop calculation for each accesion average
for (i in 1:length(vector)){
  TPM_clean[,grepl(vector[i],colnames(TPM_clean))] <- rowMeans(TPM[,grepl(vector[i],colnames(TPM))])
}

head(TPM_clean)

STEP 1.3 Load Phenotype information

#load oxdative stress table for average
Trait <- read.csv("/Users/leon/Documents/Project/Sorghum/Sorghum_oxidative_stress-Feb2022.csv", 
                  header = T, row.names = 1)
datTrait <- Trait[, 6:34]

datTrait_ave <- data.frame(matrix(ncol = 29, nrow = 48))
vector <- unique(Trait$ID)
colnames(datTrait_ave) <- colnames(datTrait)
rownames(datTrait_ave) <- vector
### Calculate the average by loop
for (i in 1:length(vector)){
  datTrait_ave[grepl(vector[i],rownames(datTrait_ave)),] <- colMeans(datTrait[grepl(vector[i],rownames(datTrait)),])
}

datTrait

#load LI_6800 table for average (morning data)
LI1 <- read.csv("/Users/leon/Documents/Project/Sorghum/LI6800_clean-20220324-morning.csv", 
                  header = T, row.names = 1)
dat_LI1 <- LI1[, 7:18]

LI1_ave <- data.frame(matrix(ncol = 12, nrow = 48))
vector1 <- unique(LI1$ID)
colnames(LI1_ave) <- colnames(dat_LI1 )
rownames(LI1_ave) <- vector1

### Calculate the average by loop
for (i in 1:length(vector1)){
  LI1_ave[grepl(vector1[i],rownames(LI1_ave)),] <- colMeans(dat_LI1[grepl(vector1[i],rownames(dat_LI1)),])
}

LI1_ave

#load LI_6800 table for average (afternoon data)
LI2 <- read.csv("/Users/leon/Documents/Project/Sorghum/LI6800_clean-20220324-afternoon.csv", 
                  header = T, row.names = 1)
dat_LI2 <- LI2[, 7:18]

LI2_ave <- data.frame(matrix(ncol = 12, nrow = 48))
vector2 <- unique(LI2$ID)
colnames(LI2_ave) <- colnames(dat_LI2)
rownames(LI2_ave) <- vector2

### Calculate the average by loop
for (i in 1:length(vector2)){
  LI2_ave[grepl(vector2[i],rownames(LI2_ave)),] <- colMeans(dat_LI2[grepl(vector2[i],rownames(dat_LI2)),])
}

LI2_ave

#load metabolites data for average
Metabolites <- read.csv("/Users/leon/Documents/Project/Sorghum/Sorghum_Metabolites-Feb2023.csv", 
                  header = T, row.names = 1)
dat_Metabolites <-Metabolites[, 6:48]

Metabolites_ave <- data.frame(matrix(ncol = 43, nrow = 48))
vector3 <- unique(Metabolites$ID)
colnames(Metabolites_ave) <- colnames(dat_Metabolites)
rownames(Metabolites_ave) <- vector3

### Calculate the average by loop
for (i in 1:length(vector3)){
  Metabolites_ave[grepl(vector3[i],rownames(Metabolites_ave)),] <- colMeans(dat_Metabolites[grepl(vector3[i],rownames(dat_Metabolites)),])
}

Metabolites_ave

STEP 1. Load genes with high MAD score

datExpr <- as.data.frame(t(TPM_clean[order(apply(TPM_clean,1,mad), decreasing = T)[1:22314],]))
#write.csv(data.frame(transcripts_ID = colnames(datExpr)),"/home/liangyu/1_Project/8_Sorghum/3_WGCNA/MAD85_TranscriptsID.csv", quote=F, row.names = F)

### Transform TPM into Z-score
scale_rows = function(x){
    m = apply(x, 1, mean, na.rm = T)
    s = apply(x, 1, sd, na.rm = T)
    return((x - m) / s)
}

Zscore_vsd<- scale_rows(as.data.frame(t(datExpr)))
head(Zscore_vsd,100)

write.csv(Zscore_vsd, "/Users/leon/Documents/Project/Sorghum/Zscore_vsd_20230101.csv", quote = F)

STEP 2. Calculate the soft thresholding power (beta)

enableWGCNAThreads(nThreads = 10)

powers = c(c(1:10), seq(from = 12, to=30, by=2))
# Call the network topology analysis function
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Pring estimated power
sft$powerEstimate
k <- softConnectivity(datE=datExpr,power=sft$powerEstimate)

# Plot the results:
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
     main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
     labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
     xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
     main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

par(mfrow = c(1,2));
hist(k)
scaleFreePlot(k,main="Check Scale free topology\n")

STEP3.1. Step-by-step network construction and module detection

#Co-expression similarity and adjacency
softPower = sft$powerEstimate;
adjacency = adjacency(datExpr, power = softPower)

# Turn adjacency into topological overlap
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM

# Call the hierarchical clustering function
geneTree = hclust(as.dist(dissTOM), method = "average");

# Plot the dendrogram and colors underneath
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")

STEP 3.2 Merging of modules whose expression profiles are very similar

# Calculate eigengenes
MEList = moduleEigengenes(datExpr, colors = dynamicColors)
MEs = MEList$eigengenes
# Calculate dissimilarity of module eigengenes
MEDiss = 1-cor(MEs);
# Cluster module eigengenes
METree = hclust(as.dist(MEDiss), method = "average");
### Chose the cutoff for tree height
### NOTE: Corrssponded to the genes with 0.8 corelation coefficient will be merged into one module

MEDissThres = 0.25

# Call an automatic merging function
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)

##  mergeCloseModules: Merging modules whose distance is less than 0.25
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 46 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 37 module eigengenes in given set.
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 36 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 36 module eigengenes in given set.

# The merged module colors
mergedColors = merge$colors;
# Eigengenes of the new merged modules:
mergedMEs = merge$newMEs;

### Plot the final modules been merged 
#sizeGrWindow(12, 9)
#pdf(file = "Plots/geneDendro-3.pdf", wi = 9, he = 6)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)

### Export the count data
Module_count <- as.data.frame(table(mergedColors))
write.csv(Module_count, "/Users/leon/Documents/Project/Sorghum/module-counts.csv")

Module_count

STEP 4. Plot clustereing among samples

#check the sample names and gene names
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

#cluster tress (check if any outliers existed among samples)
datExpr_tree <- hclust(dist(datExpr), method = "average")
par(mar = c(0,5,2,0))
plot(datExpr_tree, main = "Sample clustering", sub="", xlab="", cex.lab = 2, 
     cex.axis = 1, cex.main = 1,cex.lab=1)

STEP 6 Compare the epigengene values across samples from each module

#Build the epignegene value matrix for plot
MEs0 <- moduleEigengenes(datExpr, mergedColors)$eigengenes
MEs0$Sample <- rownames(MEs0)
MEs0$Name <- rownames(MEs0)
MEs <- separate(MEs0, Name, c("Accession", "Condition", "Time"))


### Plot Eigengene value for each module in a line graph mode
for (i in 1:nrow(Module_count)){
  plot1 <- ggplot(MEs[MEs$Condition =="WL",],aes(x = Time,y = get(paste0("ME", Module_count[i,1])), color = Accession)) +
  geom_point(size = 1) +
  geom_line(alpha = 0.3,size = 1, aes(group= Accession)) +
  theme_bw(base_size = 10) +
  theme(axis.text.x=element_text(angle=90)) + ylab(paste0("WL Eigengene value of ", Module_count[i,1]))

  plot2 <- ggplot(MEs[MEs$Condition =="WW",],aes(x = Time,y = get(paste0("ME", Module_count[i,1])), color = Accession)) +
  geom_point(size = 1) +
  geom_line(alpha = 0.3,size = 1, aes(group= Accession)) +
  theme_bw(base_size = 10) +
  theme(axis.text.x=element_text(angle=90)) + ylab(paste0("WW Eigengene value of ", Module_count[i,1]))
  
  ###  combine plot and export into pdf
  #pdf(paste0("/mnt/Knives/1_data/Sorghum_WGCNA/Results/vsd100/Eigene-value/eigengene_", Module_count[i,1], ".pdf", sep = ""), width = 8, height = 4)
  print(plot_grid(plot1, plot2, labels = c('A', 'B'), label_size = 12))
  #dev.off()
}

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

STEP 7. Module-trait membership correlation

STEP 7.1

nGenes = ncol(datExpr)
nSamples = nrow(datExpr)

moduleColors <- mergedColors
MEs0 <- moduleEigengenes(datExpr, moduleColors)$eigengenes
MEs <- orderMEs(MEs0)

corType = "pearson"
robustY = ifelse(corType=="pearson",T,F)

STEP 7.2 Build correlation between oxdative status data and expression module

##Build correlation between oxdative status data and expression module
if (corType=="pearson") {
  modOXCor = cor(MEs, datTrait_ave[,colnames(datTrait_ave)[1:29]], use = "p")
  modOXP = corPvalueStudent(modOXCor, nSamples)
} else {
  modOXCorP = bicorAndPvalue(MEs_col, colnames(datTrait_ave[1:29]) , robustY=robustY)
  modOXCor = modOXCorP$bicor
  modOXP   = modOXCorP$p
}

textMatrix1 = paste(signif(modOXCor, 2), "\n(", signif(modOXP, 1), ")", sep = "")
dim(textMatrix1) = dim(modOXCor)

modOXCor_results <- as.data.frame(modOXCor)
modOXP_results <- as.data.frame(modOXP)

min(modOXCor_results)

## [1] -0.7315044

max(modOXCor_results)

## [1] 0.7610047

write.csv(modOXCor_results,
          "/Users/leon/Documents/Project/Sorghum/2_Oxdative_vsd100_Correlation.csv", quote = F)
modOXCor_results

write.csv(modOXP_results,
          "/Users/leon/Documents/Project/Sorghum/2_Oxdative_vsd100_Pvalue.csv", quote = F)
modOXP_results

### Plot the correlation heatmap
labeledHeatmap(Matrix = modOXCor, xLabels = colnames(datTrait_ave[1:29]), 
               yLabels = colnames(MEs), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs), colorLabels = TRUE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix1, setStdMargins = TRUE, 
               cex.text = 0.25, zlim = c(-1,1),
               main = paste("Module - Oxdative-Stress data relationships"))

STEP 7.3 Build correlation between LI6800 data and expression module (morning data/afternoon data)

###Build correlation between LI6800 data and expression module (morning)
if (corType=="pearson") {
  modLI1Cor = cor(MEs, LI1_ave[,colnames(LI1_ave)[1:12]], use = "p")
  modLI1P = corPvalueStudent(modLI1Cor, nSamples)
} else {
  modLI1CorP = bicorAndPvalue(MEs_col, colnames(LI1_ave[1:12]) , robustY=robustY)
  modLI1Cor = modLI1CorP$bicor
  modLI1P   = modLI1CorP$p
}

textMatrix2 = paste(signif(modLI1Cor, 2), "\n(", signif(modLI1P, 1), ")", sep = "")
dim(textMatrix2) = dim(modLI1Cor)

modLI1Cor_results <- as.data.frame(modLI1Cor)
modLI1P_results <- as.data.frame(modLI1P)
min(modLI1Cor_results)

## [1] -0.7147423

max(modLI1Cor_results)

## [1] 0.7285545

write.csv(modLI1Cor_results, "/Users/leon/Documents/Project/Sorghum/2_LIM_vsd100_correlation.csv", quote = F)
write.csv(modLI1P_results, "/Users/leon/Documents/Project/Sorghum/2_LIM_vsd100_Pvalue.csv", quote = F)

modLI1Cor_results

modLI1P_results

### Plot the correlation heatmap
labeledHeatmap(Matrix = modLI1Cor, xLabels = colnames(LI1_ave[1:12]), 
               yLabels = colnames(MEs), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs), colorLabels = TRUE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix2, setStdMargins = TRUE, 
               cex.text = 0.25, zlim = c(-1,1),
               main = paste("Module-LI6800 data relationships (Morning)"))

###Build correlation between LI6800 data and expression module (afternoon)
if (corType=="pearson") {
  modLI2Cor = cor(MEs, LI2_ave[,colnames(LI2_ave)[1:12]], use = "p")
  modLI2P = corPvalueStudent(modLI2Cor, nSamples)
} else {
  modLI2CorP = bicorAndPvalue(MEs_col, colnames(LI2_ave[1:12]) , robustY=robustY)
  modLI2Cor = modLI2CorP$bicor
  modLI2P  = modLI2CorP$p
}

textMatrix3 = paste(signif(modLI2Cor, 2), "\n(", signif(modLI2P, 1), ")", sep = "")
dim(textMatrix3) = dim(modLI2Cor)

modLI2Cor_results <- as.data.frame(modLI2Cor)
modLI2P_results <- as.data.frame(modLI2P)
min(modLI2Cor_results)

## [1] -0.710625

max(modLI2Cor_results)

## [1] 0.6827603

write.csv(modLI2Cor_results, "/Users/leon/Documents/Project/Sorghum/2_LIA_vsd100_correlation.csv", quote = F)
write.csv(modLI2P_results, "/Users/leon/Documents/Project/Sorghum/2_LIA_vsd100_Pvalue.csv", quote = F)

modLI2Cor_results

modLI2P_results

### Plot the correlation heatmap
labeledHeatmap(Matrix =  modLI2Cor, xLabels = colnames(LI2_ave[1:12]), 
               yLabels = colnames(MEs), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs), colorLabels = TRUE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix3, setStdMargins = TRUE, 
               cex.text = 0.25, zlim = c(-1,1),
               main = paste("Module-LI6800 data relationships (Afternoon)"))

###Build correlation between metabolites and gene expression
if (corType=="pearson") {
  modMetCor = cor(MEs, Metabolites_ave[,colnames(Metabolites_ave)[1:43]], use = "p")
  modMetP = corPvalueStudent(modMetCor, nSamples)
} else {
  modMetCorP = bicorAndPvalue(MEs_col, colnames(Metabolites_ave[1:43]) , robustY=robustY)
  modMetCor = modMetCorP$bicor
  modMetP  = modMetCorP$p
}

textMatrix4 = paste(signif(modMetCor, 2), "\n(", signif(modMetP, 1), ")", sep = "")
dim(textMatrix4) = dim(modMetCor)

modMetCor_results <- as.data.frame(modMetCor)
modMetP_results <- as.data.frame(modMetP)
min(modMetCor_results)

## [1] -0.6367811

max(modMetCor_results)

## [1] 0.7962088

write.csv(modMetCor_results, "/Users/leon/Documents/Project/Sorghum/2_Metabolites_vsd100_correlation.csv", quote = F)
write.csv(modMetP_results, "/Users/leon/Documents/Project/Sorghum/2_Metabolites_vsd100_Pvalue.csv", quote = F)

modMetCor_results

modMetP_results

### Plot the correlation heatmap
labeledHeatmap(Matrix =  modMetCor, xLabels = colnames(Metabolites_ave[1:43]), 
               yLabels = colnames(MEs), 
               cex.lab = 0.5, 
               ySymbols = colnames(MEs), colorLabels = TRUE, 
               colors = blueWhiteRed(50), 
               textMatrix = textMatrix4, setStdMargins = TRUE, 
               cex.text = 0.25, zlim = c(-1,1),
               main = paste("Module-metabolites data correlations (Afternoon)"))

Calculate the gene-module membership (MM)

# names (colors) of the modules
modNames <- substring(names(MEs), 3)
geneModuleMembership <- as.data.frame(cor(datExpr, MEs, use = "p"))

MMPvalue <- as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples))

names(geneModuleMembership) = paste("MM", modNames, sep="")
names(MMPvalue) = paste("p.MM", modNames, sep="")

#Export the gene-module membership (MM table includes all modules)
write.csv(geneModuleMembership, "/Users/leon/Documents/Project/Sorghum/CorrelationGene_module_membership_vsd100.csv")
geneModuleMembership

GS_MM based on particular trait

### extract the GS of oxidative stress phenotype
Oxidative_list <- colnames(datTrait_ave)
Oxidative_GS <- data.frame(matrix(ncol = length(Oxidative_list) , nrow = ncol(datExpr)))

for (i in 1:length(Oxidative_list)){
  metab <- datTrait_ave[,colnames(datTrait_ave) == Oxidative_list[i]]
  Oxidative_GS[,i] <- as.data.frame(cor(datExpr, metab, use = "p"))
  colnames(Oxidative_GS)[i] <- Oxidative_list[i]
  rownames(Oxidative_GS) <- colnames(datExpr)
}

### export phenotype data
write.csv(Oxidative_GS, "/Users/leon/Documents/Project/Sorghum/Oxidative_GS.csv", quote = F)
head(Oxidative_GS, 50)

### extract the GS of LIC6800 phenotype
LIM_list <- colnames(LI1_ave)
LIM_GS <- data.frame(matrix(ncol = length(LIM_list) , nrow = ncol(datExpr)))

for (i in 1:length(LIM_list)){
  metab <- LI1_ave[,colnames(LI1_ave) == LIM_list[i]]
  LIM_GS[,i] <- as.data.frame(cor(datExpr, metab, use = "p"))
  colnames(LIM_GS)[i] <- LIM_list[i]
  rownames(LIM_GS) <- colnames(datExpr)
}

### export phenotype data
write.csv(LIM_GS, "/Users/leon/Documents/Project/Sorghum/LIM_GS.csv", quote = F)
head(LIM_GS, 50)

### extract the GS of LIC6800 phenotype
Metabolites_list <- colnames(Metabolites_ave)
Metabolites_GS <- data.frame(matrix(ncol = length(Metabolites_list) , nrow = ncol(datExpr)))

for (i in 1:length(Metabolites_list)){
  metab <- Metabolites_ave[,colnames(Metabolites_ave) == Metabolites_list[i]]
  Metabolites_GS[,i] <- as.data.frame(cor(datExpr, metab, use = "p"))
  colnames(Metabolites_GS)[i] <- Metabolites_list[i]
  rownames(Metabolites_GS) <- colnames(datExpr)
}

### export phenotype data
write.csv(Metabolites_GS, "/Users/leon/Documents/Project/Sorghum/Metabolites_GS.csv", quote = F)
head(Metabolites_GS, 50)

Export network for cytoscape downstream visulization

for (i in 1:nrow(Module_count)){
  # Select module
  module <- Module_count[i,1]
  # Select module probes
  probes = colnames(datExpr)
  moduleColors = mergedColors
  inModule = (moduleColors==module);
  modProbes = probes[inModule];
  # Select the corresponding Topological Overlap
  modTOM <- TOM[inModule, inModule]
  dimnames(modTOM) <- list(modProbes, modProbes)
  
  cyt <- exportNetworkToCytoscape(
  modTOM,
  edgeFile = 
  paste0("/Users/leon/Documents/Project/Sorghum/Edge/edges-vsd100", paste(module), ".csv"),
  nodeFile = 
  paste0("/Users/leon/Documents/Project/Sorghum/Node/nodes-vsd100", paste(module), ".csv"),
  weighted = TRUE,
  threshold = 0,
  nodeNames = modProbes, 
  nodeAttr = moduleColors[inModule])
}

Filter weight of network using cutoff

sample.list <- read.table("/Users/leon/Documents/Project/Sorghum/sample.list", header = F)
sample.list

for (i in 1:nrow(sample.list)){
  sample.df <- read.csv(paste0("/Users/leon/Documents/Project/Sorghum/Edge/",
                               sample.list[i,1]), header = T, sep = "\t")
  ### sort weight data from largest to smallest
  Edge_sort <- sample.df[order(-sample.df$weight),]
  
  ### Take the 10% top edge connections
  Edge_clean <- unique(Edge_sort[1: nrow(Edge_sort)*0.1,])
  write.csv(Edge_clean, paste0("/Users/leon/Documents/Project/Sorghum/Clean_Edge/",sample.list[i,1]), row.names = F, quote = F)
  
  Node1 <- Edge_clean[,1, drop = F]
  Node2 <- Edge_clean[,2, drop = F]
  colnames(Node1) = "Transcripts"
  colnames(Node2) = "Transcripts"
  
  ### Build non-duplciated transcripts list 
  temp <- unique(bind_rows(Node1, Node2))
  Edge_count <- data.frame(table(bind_rows(Node1, Node2)$Transcripts))
  
  Node_clean <- data.frame(Node =  temp$Transcripts, Module = sample.list[i,1])
  Node_clean$Module <- str_replace(Node_clean$Module, "edges-vsd100", "") 
  Node_clean$Module <- str_replace(Node_clean$Module, ".csv", "")
  
  write.csv(Node_clean, paste0("/Users/leon/Documents/Project/Sorghum/Clean_node/",sample.list[i,1]), row.names = F, quote = F)
  write.csv(Edge_count, paste0("/Users/leon/Documents/Project/Sorghum/Edge_count/",sample.list[i,1]), row.names = F, quote = F)
  
}

### example files
head(Node_clean)

head(Edge_count)

head(Edge_clean)

Filter module membership for each gene (MM)

MM_score <- read.csv("/Users/leon/Documents/Project/Sorghum/CorrelationGene_module_membership_vsd100.csv", header= T)
colnames(MM_score)[1] <- "id"
colnames(MM_score) <- str_replace(colnames(MM_score), "MM", "")
Module_color <- colnames(MM_score[,-1])

### Load ids which associated with the module color
TranscriptsID <- read.table("/Users/leon/Documents/Project/Sorghum/Transcripts.matchinglist.txt", header = T)
head(TranscriptsID, 20)

#Extracting certain module
for (i in 1:length(Module_color)){
  ID_temp <- TranscriptsID[TranscriptsID$module == Module_color[i], ]
  MM_temp <- MM_score[,c("id", Module_color[i])]
  MM_summary <- left_join(ID_temp,MM_temp, by = "id")
  colnames(MM_summary)[3] <- "MM_score"
  
  assign(paste0(Module_color[i], "_MM_summary"),MM_summary)
}

MM_list <- lapply(ls(pattern = "_MM_summary"), get)
MM_combine <- bind_rows(MM_list)

head(MM_combine, 20)

write.csv(MM_combine, "/Users/leon/Documents/Project/Sorghum/3_MM_vsd100_clean.csv")

Sorghum WGCNA and module-trait correlated genes

Liang Yu, Andrew Nelson

2022/8/15

Co-expression network construction (WGCNA)

Prior installation and configuration step

Load libraries

Load envrioment

STEP 1.1 Load expression data and meta-table

STEP 1.2 Calculate the average numbers for each two replicates

STEP 1.3 Load Phenotype information

STEP 1. Load genes with high MAD score

STEP 2. Calculate the soft thresholding power (beta)

STEP3.1. Step-by-step network construction and module detection

STEP 3.2 Merging of modules whose expression profiles are very similar

STEP 4. Plot clustereing among samples

STEP 6 Compare the epigengene values across samples from each module

STEP 7. Module-trait membership correlation

STEP 7.1

STEP 7.2 Build correlation between oxdative status data and expression module

STEP 7.3 Build correlation between LI6800 data and expression module (morning data/afternoon data)

Calculate the gene-module membership (MM)

GS_MM based on particular trait

Export network for cytoscape downstream visulization

Filter weight of network using cutoff

Filter module membership for each gene (MM)