Read in expression data
setwd("C:/Users/Fei_lab/Documents/BTI2023/Reruns")
library(WGCNA)
library(tidyr)
library(ggplot2)
library(cowplot)
library(dplyr)
library(stringr)
library(rmarkdown)
options(strigsAsFactors = FALSE)
datExpressed = read.csv("VDMclean.csv")
Translate frame so that rownames are sample names and column headers
are genes
datExprFrameRaw = as.data.frame(t(datExpressed[, -c(1)]))
names(datExprFrameRaw) = datExpressed$X
rownames(datExprFrameRaw) = names(datExpressed[-c(1)])
n <- nrow(datExprFrameRaw)
datExprFrame <- aggregate(datExprFrameRaw, by = list(group = rep(1:(n/3), each = 3)), FUN = mean)
rownames(datExprFrame) <- c("Jelly_Br_M82","Jelly_MG_M82","Jelly_Ripe_M82","Pericarp_21dpa_M82","Pericarp_Br_M82","Pericarp_MG_M82","Pericarp_Ripe_M82","Placenta_21dpa_M82","Placenta_Br_M82","Placenta_MG_M82","Placenta_Ripe_M82","Jelly_Br_Pennellii","Jelly_MG_Pennellii","Jelly_Ripe_Pennellii","Pericarp_21dpa_Pennellii","Pericarp_Br_Pennellii","Pericarp_MG_Pennellii","Pericarp_Ripe_Pennellii","Placenta_21dpa_Pennellii","Placenta_Br_Pennellii","Placenta_MG_Pennellii","Placenta_Ripe_Pennellii")
datExprFrame <- datExprFrame[-1]
rmarkdown::paged_table(datExprFrame)
Check for genes and samples with too many missing values
gag = goodSamplesGenes(datExprFrame, verbose = 3)
## Flagging genes and samples with too many missing values...
## ..step 1
gag$allOK
## [1] TRUE
# Should return true
Cluster the samples using a a tree
sampleTree = hclust(dist(datExprFrame), method = "average")
# Plot sample tree
sizeGrWindow(12, 9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub = "", xlab = "", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
# abline(h = 15, col = "red")
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
## clust
## 0
## 22
keepSamples = (clust == 1)
datExpr = datExprFrame[keepSamples, ]
nGenes = ncol(datExprFrame)
nSamples = nrow(datExprFrame)
Pick soft thresholding power
powers = c(c(1:10), seq(from = 12, to = 20, by = 2))
sft = pickSoftThreshold(datExprFrame, powerVector = powers, verbose = 5)
## pickSoftThreshold: will use block size 2348.
## pickSoftThreshold: calculating connectivity for given powers...
## ..working on genes 1 through 2348 of 2348
## Warning: executing %dopar% sequentially: no parallel backend registered
## Power SFT.R.sq slope truncated.R.sq mean.k. median.k. max.k.
## 1 1 0.9310 3.770 0.969 1320.0 1350.0 1730
## 2 2 0.7820 1.700 0.993 858.0 869.0 1350
## 3 3 0.5220 0.776 0.977 599.0 594.0 1090
## 4 4 0.0779 0.183 0.951 438.0 420.0 899
## 5 5 0.0712 -0.159 0.928 332.0 303.0 760
## 6 6 0.3840 -0.411 0.944 257.0 224.0 653
## 7 7 0.6150 -0.609 0.953 204.0 170.0 569
## 8 8 0.7490 -0.754 0.955 165.0 131.0 501
## 9 9 0.8400 -0.868 0.985 135.0 101.0 445
## 10 10 0.8760 -0.951 0.989 112.0 79.0 399
## 11 12 0.9250 -1.080 0.988 79.4 50.2 327
## 12 14 0.9450 -1.170 0.986 58.4 32.9 274
## 13 16 0.9550 -1.230 0.984 44.2 22.2 233
## 14 18 0.9630 -1.250 0.984 34.3 15.6 201
## 15 20 0.9610 -1.270 0.975 27.1 11.1 175
k <- softConnectivity(datE=datExprFrame,power=sft$powerEstimate)
## softConnectivity: FYI: connecitivty of genes with less than 8 valid samples will be returned as NA.
## ..calculating connectivities..
sizeGrWindow(9, 5)
par(mfrow = c(1, 2))
cex1 = 0.9
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2", type = "n", main = paste("ScaleIndependence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers, cex=cex1, col="red")
abline(h = 0.9, col = "red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
scaleFreePlot(k, main = " Check Scale fee topology\n")

## scaleFreeRsquared slope
## 1 0.92 3.77
net = blockwiseModules(datExprFrame, power = 16,
TOMType = "unsigned", minModuleSize = 10,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "tomatoGeneTOM",
verbose = 3)
## Calculating module eigengenes block-wise from all genes
## Flagging genes and samples with too many missing values...
## ..step 1
## ..Working on block 1 .
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
## ..saving TOM for block 1 into file tomatoGeneTOM-block.1.RData
## ....clustering..
## ....detecting modules..
## ....calculating module eigengenes..
## ....checking kME in modules..
## ..merging modules that are too close..
## mergeCloseModules: Merging modules whose distance is less than 0.25
## Calculating new MEs...
Plot cluster dendogram
sizeGrWindow(12, 9)
mergedColors = labels2colors(net$colors)
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
Merge modules with similar expression profiles and plot new
denodrogram
MEList = moduleEigengenes(datExprFrame, colors = mergedColors)
MEs = MEList$eigengenes
MEDiss = 1-cor(MEs);
METree = hclust(as.dist(MEDiss), method = "average");
MEDissThres = 0.25
merge = mergeCloseModules(datExprFrame, mergedColors, cutHeight = MEDissThres, verbose = 3)
## mergeCloseModules: Merging modules whose distance is less than 0.25
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 9 module eigengenes in given set.
## Calculating new MEs...
## multiSetMEs: Calculating module MEs.
## Working on set 1 ...
## moduleEigengenes: Calculating 9 module eigengenes in given set.
plotDendroAndColors(geneTree, mergedColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")

Module_count <- as.data.frame(table(mergedColors))
Compare values across samples from each module
#Build the epignegene value matrix for plot
MEs0 <- moduleEigengenes(datExprFrame, mergedColors)$eigengenes
MEs0$Sample <- rownames(MEs0)
MEs0$Name <- rownames(MEs0)
MEs <- separate(MEs0, Name, c("Location", "Stage", "Parent"))
MEs$Stage <- factor(MEs$Stage, levels = c("21dpa", "MG", "Br", "Ripe"))
# Plot Eigengene value for each module in a line graph mode
for (i in 1:nrow(Module_count)){
plot1 <- ggplot(MEs,aes(x = Stage,y = get(paste0("ME", Module_count[i,1])), color = Parent)) +
geom_point(size = 1) +
stat_summary(fun = "mean", geom = "line", aes(group = Parent), size = 1, alpha = 0.3) +
theme_bw(base_size = 10) +
theme(axis.text.x=element_text(angle=90)) + ylab(paste0("M82 Eigengene value of ", Module_count[i,1]))
print(plot1)
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.









Network heatmap plot of all genes
dissTOM = 1-TOMsimilarityFromExpr(datExprFrame, power = 16);
## TOM calculation: adjacency..
## ..will not use multithreading.
## Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
plotTOM = dissTOM^7;
diag(plotTOM) = NA;
sizeGrWindow(13,13)
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")
Module_count <- as.data.frame(table(mergedColors))
rmarkdown::paged_table(Module_count)
write.csv(names(datExprFrame)[moduleColors=="turquoise"], file = "genes.csv", row.names = FALSE)