Read in expression data

setwd("C:/Users/Fei_lab/Documents/BTI2023/Reruns")
library(WGCNA)
library(tidyr)
library(ggplot2)
library(cowplot)
library(dplyr)
library(stringr)
options(strigsAsFactors = FALSE)
datExpressed = read.csv("expressedinallEXAMPLE.csv")

Translate frame so that rownames are sample names and column headers are genes for ease of manipulating data

datExprFrameRaw = as.data.frame(t(datExpressed[, -c(1)]))
names(datExprFrameRaw) = datExpressed$X
rownames(datExprFrameRaw) = names(datExpressed[-c(1)])
View(datExprFrameRaw)

n <- nrow(datExprFrameRaw)
datExprFrame <- aggregate(datExprFrameRaw, by = list(group = rep(1:(n/3), each = 3)), FUN = mean)

rownames(datExprFrame) <- c("Jelly_Br_M82","Jelly_MG_M82","Jelly_Ripe_M82","Pericarp_21dpa_M82","Pericarp_Br_M82","Pericarp_MG_M82","Pericarp_Ripe_M82","Placenta_21dpa_M82","Placenta_Br_M82","Placenta_MG_M82","Placenta_Ripe_M82","Jelly_Br_Pennellii","Jelly_MG_Pennellii","Jelly_Ripe_Pennellii","Pericarp_21dpa_Pennellii","Pericarp_Br_Pennellii","Pericarp_MG_Pennellii","Pericarp_Ripe_Pennellii","Placenta_21dpa_Pennellii","Placenta_Br_Pennellii","Placenta_MG_Pennellii","Placenta_Ripe_Pennellii")
datExprFrame <- datExprFrame[-1]
rmarkdown::paged_table(datExprFrame)

Check for genes and samples with too many missing values

gag = goodSamplesGenes(datExprFrame, verbose = 3)
##  Flagging genes and samples with too many missing values...
##   ..step 1
gag$allOK
## [1] TRUE
# Should return TRUE

Cluster the samples using a tree to detect outliers

sampleTree = hclust(dist(datExprFrame), method = "average")
# Plot sample tree 
sizeGrWindow(12, 9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub = "", xlab = "", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
# abline(h = 15, col = "red")

Pick soft thresholding power

powers = c(c(1:10), seq(from = 12, to = 20, by = 2))

sft = pickSoftThreshold(datExprFrame, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 2290.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 2290 of 2290
## Warning: executing %dopar% sequentially: no parallel backend registered
##    Power SFT.R.sq   slope truncated.R.sq mean.k. median.k. max.k.
## 1      1 0.616000  2.4500          0.972 1040.00   1070.00 1490.0
## 2      2 0.177000  0.6400          0.943  577.00    581.00 1030.0
## 3      3 0.000431  0.0242          0.970  352.00    344.00  748.0
## 4      4 0.119000 -0.3840          0.980  230.00    218.00  559.0
## 5      5 0.328000 -0.6790          0.984  157.00    144.00  428.0
## 6      6 0.482000 -0.8800          0.984  112.00     98.70  336.0
## 7      7 0.578000 -1.0300          0.981   82.50     69.90  269.0
## 8      8 0.665000 -1.1500          0.988   62.30     51.40  219.0
## 9      9 0.733000 -1.2600          0.987   48.10     38.70  180.0
## 10    10 0.770000 -1.3300          0.981   37.80     29.70  150.0
## 11    12 0.813000 -1.4300          0.981   24.50     18.60  108.0
## 12    14 0.848000 -1.5200          0.979   16.70     12.30   79.6
## 13    16 0.875000 -1.5600          0.986   11.90      8.50   60.4
## 14    18 0.892000 -1.6000          0.990    8.72      6.06   47.2
## 15    20 0.902000 -1.6400          0.991    6.57      4.50   37.5
k <- softConnectivity(datE=datExprFrame,power=sft$powerEstimate) 
##  softConnectivity: FYI: connecitivty of genes with less than 8 valid samples will be returned as NA.
##  ..calculating connectivities..
sizeGrWindow(9, 5)
par(mfrow = c(1, 2))
cex1 = 0.9

# Plot scale free topology
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2", type = "n", main = paste("ScaleIndependence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers, cex=cex1, col="red")
abline(h = 0.9, col = "red")

# Plot mean connectivity
plot(sft$fitIndices[,1], sft$fitIndices[,5],
  xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
  main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

Check scale free topology

scaleFreePlot(k, main = " Check Scale fee topology\n")

##   scaleFreeRsquared slope
## 1              0.86 -1.56
# Should be around 0.9

One step network construction

net = blockwiseModules(datExprFrame, power = 16,
                      TOMType = "unsigned", minModuleSize = 10,
                      reassignThreshold = 0, mergeCutHeight = 0.25,
                      numericLabels = TRUE, pamRespectsDendro = FALSE,
                      saveTOMs = TRUE,
                      saveTOMFileBase = "tomatoGeneTOM",
                      verbose = 3)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will not use multithreading.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file tomatoGeneTOM-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...

Plot cluster dendogram

sizeGrWindow(12, 9)
mergedColors = labels2colors(net$colors)
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE, hang = 0.03,
  addGuide = TRUE, guideHang = 0.05)

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];

Merge modules with similar expression profiles and plot new denodrogram

# Merge modules with similar expression profiles
MEList = moduleEigengenes(datExprFrame, colors = mergedColors)
MEs = MEList$eigengenes
MEDiss = 1-cor(MEs);
METree = hclust(as.dist(MEDiss), method = "average");

MEDissThres = 0.25
merge = mergeCloseModules(datExprFrame, mergedColors, cutHeight = MEDissThres, verbose = 3)
##  mergeCloseModules: Merging modules whose distance is less than 0.25
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 14 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 14 module eigengenes in given set.
# Plot new dendrogram
plotDendroAndColors(geneTree, mergedColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")

Module_count <- as.data.frame(table(mergedColors))

Compare values across samples from each module

#Build the epignegene value matrix for plot
MEs0 <- moduleEigengenes(datExprFrame, mergedColors)$eigengenes
MEs0$Sample <- rownames(MEs0)
MEs0$Name <- rownames(MEs0)
MEs <- separate(MEs0, Name, c("Location", "Stage", "Parent"))

MEs$Stage <- factor(MEs$Stage, levels = c("21dpa", "MG", "Br", "Ripe"))
# Plot Eigengene value for each module in a line graph mode
for (i in 1:nrow(Module_count)){
  plot1 <- ggplot(MEs,aes(x = Stage,y = get(paste0("ME", Module_count[i,1])), color = Parent)) +
  geom_point(size = 1) +
  stat_summary(fun = "mean", geom = "line", aes(group = Parent), size = 1, alpha = 0.3) +
  theme_bw(base_size = 10) +
  theme(axis.text.x=element_text(angle=90)) + ylab(paste0("M82 Eigengene value of ", Module_count[i,1]))
  print(plot1)
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Network heatmap plot, all genes

dissTOM = 1-TOMsimilarityFromExpr(datExprFrame, power = 16);
## TOM calculation: adjacency..
## ..will not use multithreading.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
plotTOM = dissTOM^7;
diag(plotTOM) = NA;
sizeGrWindow(13,13)
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

Extract counts of each module

Module_count <- as.data.frame(table(mergedColors))
rmarkdown::paged_table(Module_count)