#PART ONE Network analysis of liver expression data from female mice: finding modules related to body weight 
#1.NETWORK ANALYSIS OF LIVER EXPRESSION 
#Set Working Directory
getwd();
workingDir = "."; 
setwd("C://Users//amayr//Desktop//FemaleLiver-Data");
library(WGCNA);
options(stringsAsFactors = FALSE);
#Look at the female and male liver data
femData = read.csv("LiverFemale3600.csv")
dim(femData);
names(femData)
datExpr0 = as.data.frame(t(femData[,-c(1:8)]))
names(datExpr0) = femData$substanceBXH;
datExpr0 = as.data.frame(t(femData[, -c(1:8)]))
names(datExpr0) = femData$substanceBXH;
rownames(datExpr0) = names(femData)[-c(1:8)]
#Identify genes and samples with excessive numbers of missing samples
gsg = goodSamplesGenes(datExpr0, verbose = 3);
gsg$allOK
#If the above statement is true, now cluster the samples on their Euclidean distance
sampleTree = hclust(dist(datExpr0), method = "average");
#Illustrate both dendograms at the same time: plot both into a pdf file. Incorporate a base cut if necessary for any outliers
sizeGrWindow(12,9)
par(cex =(0.6));
par(mar = c(0,4,2,0))
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(echo = TRUE)
plot(sampleTree, main = "Sample clustering to detect outliers", sub="", xlab="", cex.lab = 1.5,cex.axis = 1.5, cex.main = 2)
abline(h = 15, col = "red")
clust = cutreeStatic(sampleTree, cutHeight = 15, minSize = 10)
table(clust)
keepSamples = (clust==1)
datExpr = datExpr0[keepSamples,]
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
#Read in the trait data and match the samples for which they were measured to the expression samples.
traitData = read.csv("ClinicalTraits.csv")
dim(traitData)
names(traitData)
allTraits = traitData[, -c(31, 16)]
allTraits = allTraits[, c(2, 11:36) ]
dim(allTraits)
names(allTraits)
femaleSamples = rownames(datExpr);
traitRows = match(femaleSamples, allTraits$Mice);
datTraits = allTraits[traitRows, -1];
rownames(datTraits) = allTraits[traitRows, 1];
collectGarbage();
sampleTree2 = hclust(dist(datExpr), method = "averag")
traitColors = numbers2colors(datTraits, signed = FALSE);
plotDendroAndColors(sampleTree2, traitColors,groupLabels = names(datTraits),main = "Sample dendrogram and trait heatmap")
plot(pressure)
save(datExpr, datTraits, file = "FemaleLiver-01-dataInput.RData")

Figure 1: Clustering dendrogram of samples based on their Euclidean distance


#2.A AUTOMATIC NETWORK CONSTRUCTION & MODULE DETECTION 
#2.A.1 Choosing the soft threshold power:analysis of network topology
#Set WD
getwd()
workingDir = "."; 
setwd("C://Users//amayr//Desktop//FemaleLiver-Data");
library(WGCNA);
options(stringsAsFactors = FALSE);
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold (datExpr0, powerVector = powers, verbose = 5)
sizeGrWindow(9,5)
par(mfrow= c(1,2));
cex1 = 0.9;
#Scale-free topology fit index as a function of soft-threshold power: Scale Indepence Plot
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab = "Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red");
abline(h=0.90, cool="red")
#Mean connectivity Plot
plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
#2.A.2 One-step network construction & module detection
#Construct the gene network
net = blockwiseModules (datExpr, power = 6, TOMType = "unsigned", minModuleSize = 30,reassignThreshold = 0, mergeCutHeight = 0.25,numericLabels = TRUE, pamRespectsDendro = FALSE,saveTOMs = TRUE, saveTOMFileBase = "femaleMouseTom", verbose = 3)
#Identify modules and their size
table(net$colors)
#Plot the dendogram with color assignment 
sizeGrWindow(12,9)
mergedColors = labels2colors(net$colors)
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]], "Module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs;
geneTree = net$dendrograms[[1]];
save(MEs, moduleLabels, moduleColors, geneTree, file = "FemaleLiver-02-networkConstruction-auto.RData")

Fig 2: Clustering dendrogram of genes, with dissimilarity based on topological overlap, together with assignedmodule colors.


#2.B STEP-BY-STEP GENE NETWORK CONSTRUCTION & MODULE IDENTIFICATION
#2.B.1 Choosing the soft threshold power:analysis of network topology
#Set WD
getwd()
workingDir = "."; 
setwd("C://Users//amayr//Desktop//FemaleLiver-Data");
library(WGCNA)
options(stringsAsFactors = FALSE);
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold (datExpr0,powerVector = powers, verbose = 5)
sizeGrWindow(9,5)
#Change the number (1,2) to (1,1) so data is clear/not crowded
par(mfrow= c(1,1));
cex1 = 0.9;
#Scale-free topology fit index as a function of soft-threshold power: Scale Indepence Plot
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab = "Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red");
abline(h=0.90, cool="red")
#Mean connectivity Plot
plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
#2.B.2 Co-expression similarity and adjacency
softPower = 6;
adjacency = adjacency(datExpr, power = softPower);
#2.B.3 Adjacenct to Topological Overlap Matrix (TOM)
#Done to minimize effects of noise and spurious associations
TOM = TOMsimilarity(adjacency);
dissTOM = 1-TOM
#2.B.4 Clustering using TOM: produces dendrogram of genes
geneTree = hclust(as.dist(dissTOM), method = "average");
#Plot the dendogram
sizeGrWindow(12,9) 
plot(geneTree, xlab="", sub="", main = "Gene clustering on TOM-based dissimilarity", labels = FALSE, hang = 0.04);
#Set module size relatively high
minModuleSize = 30;
# Module identification using dynamic tree cut:
dynamicMods = cutreeDynamic(dendro = geneTree, distM = dissTOM,deepSplit = 2, pamRespectsDendro = FALSE, minClusterSize = minModuleSize);
table(dynamicMods)
#Plot the module assignment under the gene dendrogram
#How to convert numeric tables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicMods)
#How to plot the dendrogram and colors underneath
sizeGrWindow(8,6)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",dendroLabels = FALSE, hang = 0.03,addGuide = TRUE, guideHang = 0.05,main = "Gene dendrogram and module colors")
#2.B.5 Merge modules whose expression profiles are very similar
#Calculate Eigengenes to quantify co-expression similarity of entire modules
MEList = moduleEigengenes (datExpr, colors = dynamicColors)
MEs = MEList$eigengenes
#Calculate dissimilarity
MEDiss = 1-cor(MEs);
#Cluster module eigengnes and Plot thhe results
METree = hclust(as.dist(MEDiss), method = "average");
sizeGrWindow(7,6)
plot(METree, main = "Clustering of module eigengenes",xlab = "", sub = "")
#Create Height Cut
MEDissThres = 0.25
abline(h=MEDissThres, col = "red")
merge = mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
mergedColors = merge$colors
mergedMEs = merge$newMEs;
#Plot Dendrogram again to see what the merge did to the module colors
sizeGrWindow(12,9)
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors), c("Dynamic Tree Cut", "Merged dynamic"), dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
#Save relevant colors for subsequent analysis
moduleColors = mergedColors
#How to construct numerical labels corresponding to colors
colorOrder = c("grey", standardColors(50));
moduleLabels = match(moduleColors, colorOrder)-1;
MEs = mergedMEs;
save(MEs, moduleLabels, moduleColors, geneTree, file = "FemaleLiver-02-networkConstruction-stepByStep.RData")

Figure 3: Clustering dendrogram of genes, with dissimilarity based on topological overlap, together with assignedmerged module colors and the original module colors.


#2.C DEALILNG WITH LARGE DATA SETS: BLOCK-WISE NETWORK CONSTRUCTION & MODULE DETECTION
getwd()
workingDir = "."; 
setwd("C://Users//amayr//Desktop//FemaleLiver-Data");
library(WGCNA)
options(stringsAsFactors = FALSE);
#2.C.1 Choose the soft-threshold power: analysis of network topology
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold (datExpr0,powerVector = powers, verbose = 5)
sizeGrWindow(9,5)
#Change the number (1,2) to (1,1) so data is clear/not crowded
par(mfrow= c(1,1));
cex1 = 0.9;
#Create the scale plot and the mean connectivity plot
#Scale-free topology fit index as a function of soft-threshold power: Scale Indepence Plot
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], xlab = "Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n", main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red");
abline(h=0.90, cool="red")
#Mean connectivity Plot
plot(sft$fitIndices[,1], sft$fitIndices[,5], xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n", main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
#2.C.2 Block-wise network construction and module detection
#Pre-cluster genes by setting the maximum blockwise 
bwnet = blockwiseModules(datExpr, maxBlockSize = 2000, power = 6, TOMType = "unsigned", minModuleSize = 30, reassignThreshold = 0, mergeCutHeight = 0.25, numericLabels = TRUE, saveTOMs = TRUE, saveTOMFileBase = "femaleMouseTOM-blockwise", verbose = 3)
#Load the results of single-block analysisload
(file = "FemaleLiver-02-networkConstruction-auto.RData");
#Relabel blockwise modules
bwLabels = matchLabels(bwnet$colors, moduleLabels);
#Labels to colors for plotting
bwModuleColors = labels2colors(bwLabels)
table(bwLabels)
sizeGrWindow(6,6)
#Plot the dendrogram and the module colors underneath for block 1
plotDendroAndColors(bwnet$dendrograms[[1]], bwModuleColors[bwnet$blockGenes[[1]]],"Module colors", main = "Gene dendrogram and module colors in block 1",dendroLabels = FALSE, hang = 0.03,addGuide = TRUE, guideHang = 0.05)
# Plot the dendrogram and the module colors underneath for block 2
plotDendroAndColors(bwnet$dendrograms[[2]], bwModuleColors[bwnet$blockGenes[[2]]],"Module colors", main = "Gene dendrogram and module colors in block 2", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)
#2.C.3 Comapre the single block to the block-wise network analysis 
sizeGrWindow(12,9)
plotDendroAndColors(geneTree,cbind(moduleColors, bwModuleColors), c("Single block", "2 blocks"), main = "Single block gene dendrogram and module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE,guideHang = 0.05)
#Verify epigenegenes of modules in both approaches are extremely similar
#First calculate the module eigengenes for first approches
singleBlockMEs = moduleEigengenes(datExpr, moduleColors)$eigengenes;
blockwiseMEs = moduleEigengenes(datExpr, bwModuleColors)$eigengenes;
#Now match the eigengens by name and calcualte the correlation of the corresponding eigengenes
single2blockwise = match(names(singleBlockMEs), names(blockwiseMEs))
signif(diag(cor(blockwiseMEs[, single2blockwise], singleBlockMEs)), 3)

#3. RELATING MODULES TO EXTERNAL CLININCAL TRAITS AND IDENTIFY IMPORTANT GENES
getwd()
workingDir = "."; 
setwd("C://Users//amayr//Desktop//FemaleLiver-Data");
library(WGCNA)
options(stringsAsFactors = FALSE);
#load network date in parts one and two
lnames = load(file="FemaleLiver-01-dataInput.RData")
lnames
lnames = load(file = "FemaleLiver-02-networkConstruction-auto.RData");
lnames
#3.A Quantifying module-trait associations

```

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.