Read in expression data

setwd("C:/Users/Fei_lab/Documents/BTI2023/Reruns")
library(WGCNA)
library(tidyr)
library(ggplot2)
library(cowplot)
library(dplyr)
library(stringr)
library(rmarkdown)
options(strigsAsFactors = FALSE)
datExpressed = read.csv("VDMclean.csv")

Translate frame so that rownames are sample names and column headers are genes

datExprFrameRaw = as.data.frame(t(datExpressed[, -c(1)]))
names(datExprFrameRaw) = datExpressed$X
rownames(datExprFrameRaw) = names(datExpressed[-c(1)])


n <- nrow(datExprFrameRaw)
datExprFrame <- aggregate(datExprFrameRaw, by = list(group = rep(1:(n/3), each = 3)), FUN = mean)

rownames(datExprFrame) <- c("Jelly_Br_M82","Jelly_MG_M82","Jelly_Ripe_M82","Pericarp_21dpa_M82","Pericarp_Br_M82","Pericarp_MG_M82","Pericarp_Ripe_M82","Placenta_21dpa_M82","Placenta_Br_M82","Placenta_MG_M82","Placenta_Ripe_M82","Jelly_Br_Pennellii","Jelly_MG_Pennellii","Jelly_Ripe_Pennellii","Pericarp_21dpa_Pennellii","Pericarp_Br_Pennellii","Pericarp_MG_Pennellii","Pericarp_Ripe_Pennellii","Placenta_21dpa_Pennellii","Placenta_Br_Pennellii","Placenta_MG_Pennellii","Placenta_Ripe_Pennellii")
datExprFrame <- datExprFrame[-1]
rmarkdown::paged_table(datExprFrame)

Check for genes and samples with too many missing values

gag = goodSamplesGenes(datExprFrame, verbose = 3)
##  Flagging genes and samples with too many missing values...
##   ..step 1
gag$allOK
## [1] TRUE
# Should return true

Cluster the samples using a a tree

sampleTree = hclust(dist(datExprFrame), method = "average")
# Plot sample tree 
sizeGrWindow(12, 9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub = "", xlab = "", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
# abline(h = 15, col = "red")
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
## clust
##  0 
## 22
keepSamples = (clust == 1)
datExpr = datExprFrame[keepSamples, ]
nGenes = ncol(datExprFrame)
nSamples = nrow(datExprFrame)

Pick soft thresholding power

powers = c(c(1:10), seq(from = 12, to = 20, by = 2))

sft = pickSoftThreshold(datExprFrame, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 2348.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 2348 of 2348
## Warning: executing %dopar% sequentially: no parallel backend registered
##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.9310  3.770          0.969  1320.0    1350.0   1730
## 2      2   0.7820  1.700          0.993   858.0     869.0   1350
## 3      3   0.5220  0.776          0.977   599.0     594.0   1090
## 4      4   0.0779  0.183          0.951   438.0     420.0    899
## 5      5   0.0712 -0.159          0.928   332.0     303.0    760
## 6      6   0.3840 -0.411          0.944   257.0     224.0    653
## 7      7   0.6150 -0.609          0.953   204.0     170.0    569
## 8      8   0.7490 -0.754          0.955   165.0     131.0    501
## 9      9   0.8400 -0.868          0.985   135.0     101.0    445
## 10    10   0.8760 -0.951          0.989   112.0      79.0    399
## 11    12   0.9250 -1.080          0.988    79.4      50.2    327
## 12    14   0.9450 -1.170          0.986    58.4      32.9    274
## 13    16   0.9550 -1.230          0.984    44.2      22.2    233
## 14    18   0.9630 -1.250          0.984    34.3      15.6    201
## 15    20   0.9610 -1.270          0.975    27.1      11.1    175
k <- softConnectivity(datE=datExprFrame,power=sft$powerEstimate) 
##  softConnectivity: FYI: connecitivty of genes with less than 8 valid samples will be returned as NA.
##  ..calculating connectivities..
sizeGrWindow(9, 5)
par(mfrow = c(1, 2))
cex1 = 0.9

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2", type = "n", main = paste("ScaleIndependence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers, cex=cex1, col="red")
abline(h = 0.9, col = "red")

plot(sft$fitIndices[,1], sft$fitIndices[,5],
  xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
  main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
scaleFreePlot(k, main = " Check Scale fee topology\n")

##   scaleFreeRsquared slope
## 1              0.92  3.77
net = blockwiseModules(datExprFrame, power = 16,
                      TOMType = "unsigned", minModuleSize = 10,
                      reassignThreshold = 0, mergeCutHeight = 0.25,
                      numericLabels = TRUE, pamRespectsDendro = FALSE,
                      saveTOMs = TRUE,
                      saveTOMFileBase = "tomatoGeneTOM",
                      verbose = 3)
##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will not use multithreading.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file tomatoGeneTOM-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...

Plot cluster dendogram

sizeGrWindow(12, 9)
mergedColors = labels2colors(net$colors)
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE, hang = 0.03,
  addGuide = TRUE, guideHang = 0.05)

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];

Merge modules with similar expression profiles and plot new denodrogram

MEList = moduleEigengenes(datExprFrame, colors = mergedColors)
MEs = MEList$eigengenes
MEDiss = 1-cor(MEs);
METree = hclust(as.dist(MEDiss), method = "average");

MEDissThres = 0.25
merge = mergeCloseModules(datExprFrame, mergedColors, cutHeight = MEDissThres, verbose = 3)
##  mergeCloseModules: Merging modules whose distance is less than 0.25
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 9 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 9 module eigengenes in given set.
plotDendroAndColors(geneTree, mergedColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")

Module_count <- as.data.frame(table(mergedColors))

Compare values across samples from each module

#Build the epignegene value matrix for plot
MEs0 <- moduleEigengenes(datExprFrame, mergedColors)$eigengenes
MEs0$Sample <- rownames(MEs0)
MEs0$Name <- rownames(MEs0)
MEs <- separate(MEs0, Name, c("Location", "Stage", "Parent"))

MEs$Stage <- factor(MEs$Stage, levels = c("21dpa", "MG", "Br", "Ripe"))
# Plot Eigengene value for each module in a line graph mode
for (i in 1:nrow(Module_count)){
  plot1 <- ggplot(MEs,aes(x = Stage,y = get(paste0("ME", Module_count[i,1])), color = Parent)) +
  geom_point(size = 1) +
  stat_summary(fun = "mean", geom = "line", aes(group = Parent), size = 1, alpha = 0.3) +
  theme_bw(base_size = 10) +
  theme(axis.text.x=element_text(angle=90)) + ylab(paste0("M82 Eigengene value of ", Module_count[i,1]))
  print(plot1)
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Network heatmap plot of all genes

dissTOM = 1-TOMsimilarityFromExpr(datExprFrame, power = 16);
## TOM calculation: adjacency..
## ..will not use multithreading.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
plotTOM = dissTOM^7;
diag(plotTOM) = NA;
sizeGrWindow(13,13)
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
Module_count <- as.data.frame(table(mergedColors))
rmarkdown::paged_table(Module_count)
write.csv(names(datExprFrame)[moduleColors=="turquoise"], file = "genes.csv", row.names = FALSE)