Read in expression data

setwd("C:/Users/Fei_lab/Documents/BTI2023/Reruns")
library(WGCNA)
library(tidyr)
library(ggplot2)
library(cowplot)
library(dplyr)
library(stringr)
library(rmarkdown)
options(strigsAsFactors = FALSE)
datExpressed = read.csv("VDMclean.csv")

Translate frame so that rownames are sample names and column headers are genes

datExprFrameRaw = as.data.frame(t(datExpressed[, -c(1)]))
names(datExprFrameRaw) = datExpressed$X
rownames(datExprFrameRaw) = names(datExpressed[-c(1)])


n <- nrow(datExprFrameRaw)
datExprFrame <- aggregate(datExprFrameRaw, by = list(group = rep(1:(n/3), each = 3)), FUN = mean)

rownames(datExprFrame) <- c("Jelly_Br_M82","Jelly_MG_M82","Jelly_Ripe_M82","Pericarp_21dpa_M82","Pericarp_Br_M82","Pericarp_MG_M82","Pericarp_Ripe_M82","Placenta_21dpa_M82","Placenta_Br_M82","Placenta_MG_M82","Placenta_Ripe_M82","Jelly_Br_Pennellii","Jelly_MG_Pennellii","Jelly_Ripe_Pennellii","Pericarp_21dpa_Pennellii","Pericarp_Br_Pennellii","Pericarp_MG_Pennellii","Pericarp_Ripe_Pennellii","Placenta_21dpa_Pennellii","Placenta_Br_Pennellii","Placenta_MG_Pennellii","Placenta_Ripe_Pennellii")
datExprFrame <- datExprFrame[-1]
rmarkdown::paged_table(datExprFrame)

Check for genes and samples with too many missing values

gag = goodSamplesGenes(datExprFrame, verbose = 3)

##  Flagging genes and samples with too many missing values...
##   ..step 1

gag$allOK

## [1] TRUE

# Should return true

Cluster the samples using a a tree

sampleTree = hclust(dist(datExprFrame), method = "average")
# Plot sample tree 
sizeGrWindow(12, 9)
par(cex = 0.6)
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub = "", xlab = "", cex.lab = 1.5, cex.axis = 1.5, cex.main = 2)
# abline(h = 15, col = "red")
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)

## clust
##  0 
## 22

keepSamples = (clust == 1)
datExpr = datExprFrame[keepSamples, ]
nGenes = ncol(datExprFrame)
nSamples = nrow(datExprFrame)

Pick soft thresholding power

powers = c(c(1:10), seq(from = 12, to = 20, by = 2))

sft = pickSoftThreshold(datExprFrame, powerVector = powers, verbose = 5)

## pickSoftThreshold: will use block size 2348.
##  pickSoftThreshold: calculating connectivity for given powers...
##    ..working on genes 1 through 2348 of 2348

## Warning: executing %dopar% sequentially: no parallel backend registered

##    Power SFT.R.sq  slope truncated.R.sq mean.k. median.k. max.k.
## 1      1   0.9310  3.770          0.969  1320.0    1350.0   1730
## 2      2   0.7820  1.700          0.993   858.0     869.0   1350
## 3      3   0.5220  0.776          0.977   599.0     594.0   1090
## 4      4   0.0779  0.183          0.951   438.0     420.0    899
## 5      5   0.0712 -0.159          0.928   332.0     303.0    760
## 6      6   0.3840 -0.411          0.944   257.0     224.0    653
## 7      7   0.6150 -0.609          0.953   204.0     170.0    569
## 8      8   0.7490 -0.754          0.955   165.0     131.0    501
## 9      9   0.8400 -0.868          0.985   135.0     101.0    445
## 10    10   0.8760 -0.951          0.989   112.0      79.0    399
## 11    12   0.9250 -1.080          0.988    79.4      50.2    327
## 12    14   0.9450 -1.170          0.986    58.4      32.9    274
## 13    16   0.9550 -1.230          0.984    44.2      22.2    233
## 14    18   0.9630 -1.250          0.984    34.3      15.6    201
## 15    20   0.9610 -1.270          0.975    27.1      11.1    175

k <- softConnectivity(datE=datExprFrame,power=sft$powerEstimate)

##  softConnectivity: FYI: connecitivty of genes with less than 8 valid samples will be returned as NA.
##  ..calculating connectivities..

sizeGrWindow(9, 5)
par(mfrow = c(1, 2))
cex1 = 0.9

plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab = "Soft Threshold (power)", ylab = "Scale Free Topology Model Fit, signed R^2", type = "n", main = paste("ScaleIndependence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers, cex=cex1, col="red")
abline(h = 0.9, col = "red")

plot(sft$fitIndices[,1], sft$fitIndices[,5],
  xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
  main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")

scaleFreePlot(k, main = " Check Scale fee topology\n")

##   scaleFreeRsquared slope
## 1              0.92  3.77

net = blockwiseModules(datExprFrame, power = 16,
                      TOMType = "unsigned", minModuleSize = 10,
                      reassignThreshold = 0, mergeCutHeight = 0.25,
                      numericLabels = TRUE, pamRespectsDendro = FALSE,
                      saveTOMs = TRUE,
                      saveTOMFileBase = "tomatoGeneTOM",
                      verbose = 3)

##  Calculating module eigengenes block-wise from all genes
##    Flagging genes and samples with too many missing values...
##     ..step 1
##  ..Working on block 1 .
##     TOM calculation: adjacency..
##     ..will not use multithreading.
##      Fraction of slow calculations: 0.000000
##     ..connectivity..
##     ..matrix multiplication (system BLAS)..
##     ..normalization..
##     ..done.
##    ..saving TOM for block 1 into file tomatoGeneTOM-block.1.RData
##  ....clustering..
##  ....detecting modules..
##  ....calculating module eigengenes..
##  ....checking kME in modules..
##  ..merging modules that are too close..
##      mergeCloseModules: Merging modules whose distance is less than 0.25
##        Calculating new MEs...

Plot cluster dendogram

sizeGrWindow(12, 9)
mergedColors = labels2colors(net$colors)
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
  "Module colors",
  dendroLabels = FALSE, hang = 0.03,
  addGuide = TRUE, guideHang = 0.05)

moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];

Merge modules with similar expression profiles and plot new denodrogram

MEList = moduleEigengenes(datExprFrame, colors = mergedColors)
MEs = MEList$eigengenes
MEDiss = 1-cor(MEs);
METree = hclust(as.dist(MEDiss), method = "average");

MEDissThres = 0.25
merge = mergeCloseModules(datExprFrame, mergedColors, cutHeight = MEDissThres, verbose = 3)

##  mergeCloseModules: Merging modules whose distance is less than 0.25
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 9 module eigengenes in given set.
##    Calculating new MEs...
##    multiSetMEs: Calculating module MEs.
##      Working on set 1 ...
##      moduleEigengenes: Calculating 9 module eigengenes in given set.

plotDendroAndColors(geneTree, mergedColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")

Module_count <- as.data.frame(table(mergedColors))

Compare values across samples from each module

#Build the epignegene value matrix for plot
MEs0 <- moduleEigengenes(datExprFrame, mergedColors)$eigengenes
MEs0$Sample <- rownames(MEs0)
MEs0$Name <- rownames(MEs0)
MEs <- separate(MEs0, Name, c("Location", "Stage", "Parent"))

MEs$Stage <- factor(MEs$Stage, levels = c("21dpa", "MG", "Br", "Ripe"))
# Plot Eigengene value for each module in a line graph mode
for (i in 1:nrow(Module_count)){
  plot1 <- ggplot(MEs,aes(x = Stage,y = get(paste0("ME", Module_count[i,1])), color = Parent)) +
  geom_point(size = 1) +
  stat_summary(fun = "mean", geom = "line", aes(group = Parent), size = 1, alpha = 0.3) +
  theme_bw(base_size = 10) +
  theme(axis.text.x=element_text(angle=90)) + ylab(paste0("M82 Eigengene value of ", Module_count[i,1]))
  print(plot1)
}

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Network heatmap plot of all genes

dissTOM = 1-TOMsimilarityFromExpr(datExprFrame, power = 16);

## TOM calculation: adjacency..
## ..will not use multithreading.
##  Fraction of slow calculations: 0.000000
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.

plotTOM = dissTOM^7;
diag(plotTOM) = NA;
sizeGrWindow(13,13)
TOMplot(plotTOM, geneTree, moduleColors, main = "Network heatmap plot, all genes")

Module_count <- as.data.frame(table(mergedColors))
rmarkdown::paged_table(Module_count)

write.csv(names(datExprFrame)[moduleColors=="turquoise"], file = "genes.csv", row.names = FALSE)

VDMWGCNAAnalysis

Mukund

2023-07-17

Read in expression data

Translate frame so that rownames are sample names and column headers are genes

Check for genes and samples with too many missing values

Cluster the samples using a a tree

Pick soft thresholding power

Plot cluster dendogram

Merge modules with similar expression profiles and plot new denodrogram

Compare values across samples from each module

Network heatmap plot of all genes