MEGENA pipeline for coexpression network analysis

Make sure the following modules are loaded in your HPC cluster: ml R openssl udunits

Install MEGENA into current personal folder

#install.packages("MEGENA",lib=".")
#install.packages("DGCA",lib=".")

Load lastest MEGENA installation from personal folder

library(MEGENA,lib.loc="/sc/arion/projects/DADisorders/")

Load in normalized counts as data frame

Use VST transformed read counts (rows are genes,columns are subjects)

#count.path="/sc/hydra/work/rompag01/MEGENA/MEGENA_counts.csv"
#counts <- read.csv(count.path,header=T,row.names=1)

Computation and module parameters

n.cores <- 15; # number of cores to be used for parallelization (need to MATCH those requested in your LSF).
doPar <- TRUE; # TRUE = perform parallel PFN -> MCA. FALSE = no parallelization
method = "pearson" # Currently “pearson” (Pearson’s correlation) and “spearman” (Spearman’s correlation available) for correlation.
FDR <- 0.05; # FDR threshold to identify significant interactions.
n.cor.perm = 100; # Number of permutations in correlation screening.
n.hub.perm = 100; # Number of permutations to calculate hub significance in MHA
mod.pval = 0.05; # module p-value threshold in MCA
hub.pval = 0.05; # hub p-value threshold in MHA
min.size = 10 # minimum module size
max.size = NULL # maximum module size
# annotation to be done on the downstream
annot.table=NULL
id.col = 1
symbol.col= 2

Generate Correlation Matrix

ijw <- calculate.correlation(counts,doPerm = n.cor.perm,output.corTable = FALSE,output.permFDR = FALSE)

Register multiple cores if needed: note that set.parallel.backend() is deprecated.

run.par = doPar & (getDoParWorkers() == 1) 
if (run.par)
{
  cl <- parallel::makeCluster(n.cores)
  registerDoParallel(cl)
  # check how many workers are there
  cat(paste("number of cores to use:",getDoParWorkers(),"\n",sep = ""))
}

Major Heavy Memory Calculation (will take a lot of time)

# Compute PFN
el <- calculate.PFN(ijw[,1:3],doPar = doPar,num.cores = n.cores)
# graph dataframe
g <- graph.data.frame(el,directed = FALSE)

## perform MCA clustering.
MEGENA.output <- do.MEGENA(g,
                           mod.pval = mod.pval,hub.pval = hub.pval,remove.unsig = TRUE,
                           min.size = 10,max.size = vcount(g)/2,
                           doPar = doPar,num.cores = n.cores,n.perm = n.hub.perm,
                           save.output = FALSE)

# unregister cores as these are not needed anymore.
if (getDoParWorkers() > 1)
{
  env <- foreach:::.foreachGlobals
  rm(list=ls(name=env), pos=env)
}

Summary of MEGENA results

summary.output <- MEGENA.ModuleSummary(MEGENA.output,
                                       mod.pvalue = mod.pval,hub.pvalue = hub.pval,
                                       min.size = min.size,max.size = vcount(g)/2,
                                       annot.table = annot.table,id.col = id.col,symbol.col = symbol.col,
                                       output.sig = TRUE)

module.output <- module_convert_to_table(MEGENA.output,mod.pval = 0.05,
                                         hub.pval = 0.05,min.size = 10,max.size=vcount(g)/2)

save(summary.output,MEGENA.output,module.output,g,file="MEGENA.Results.RData")

Gene Ontology Enrichment Analysis for Each Module

# load necessary packages
library(GOstats, quietly = TRUE)
library(HGNChelper, quietly = TRUE)
library(org.Hs.eg.db, quietly = TRUE)
library(DGCA,lib.loc="/sc/arion/projects/DADisorders/")

# run ontology using vectors of gene symbols and module IDs
moduleGO_res = moduleGO(
  genes = module.output$id, 
  labels = module.output$module,
  universe = rownames(counts), 
  pval_GO_cutoff = 0.05)

ontology = extractModuleGO(moduleGO_res)

ontology <- ontology[,-length(colnames(ontology))]

### Filter top gene ontology for each module


pvalues <- ontology[,c(1:4,grep("pVal",colnames(ontology)))]

## Get p top Ontology terms output from MEGENA


## Reaad in ontology p value file if needed

#ontology <- read.csv("module_ontology_pvalues.csv",header=T)

## make it just whatever process you care about (e.g. biological process)

pvalues <- pvalues[pvalues$Ontology=="BP",]
modules <- colnames(pvalues[5:length(colnames(pvalues))])
top.ontology <- data.frame()
for (module in modules) {
  temps <- pvalues[order(pvalues[module]),]
  temp <- temps[1,1:4]
  x <- temps[module][[1]]
  x <- x[1]
  temp$module <- module
  temp$pval <- x
  top.ontology <- rbind(top.ontology,temp)
}
top.ontology$module <- unique(module.output$module)
write.csv(top.ontology,file="top.ontology.per.module.csv")

Compute modules enriched for DEGs


### Generate List of Module Lists and determine enrichment of DEGs
### Need module.IDs and two column "modules" dataframe with module and genes
modules <- module.output[,c(1,6)]
module_list = list()
for (module in modules$module){
  k <- modules[modules$module==module,]
  ids <- as.vector(k$id)
  module_list[[module]] <- ids
}
# load in GeneOverlap 
library(GeneOverlap)
## DEGs is a list of your DEGs
## DEGs should only be genes in your modules
# DEGs <- list()
degs <- c("Gene1","Gene2","etc.")
#DEGs[["DEGs"]] <- degs
total.genes <- 13048 ## Make this the total number of genes across all modules
Object <- newGOM(DEGs,module_list,total.genes)
overlap.intersect <- getMatrix(Object, name="intersection")
overlap.pval <- getMatrix(Object, name="pval")
overlap.OR <- getMatrix(Object, name="odds.ratio")
Overlap.sum <- cbind(overlap.intersect,overlap.pval,overlap.OR)

#head(Overlap.sum)
#        overlap.intersect overlap.pval overlap.OR
#c1_2.ok                 0 1.0000000000    0.00000
#c1_3.ok                 1 0.0473062048   24.20475

Overlap.sum <- data.frame(Overlap.sum)
write.csv(Overlap.sum,file="Enrichment.for.DEGs.in.Modules.csv")

Visualize significant modules

# Compile module stats
load("example.RData") # features top.ontology and Overlap.sum objects
SUM <- cbind(as.character(top.ontology$module),as.numeric(top.ontology$Size),as.character(top.ontology$Term),Overlap.sum$overlap.pval,as.numeric(Overlap.sum$overlap.OR))
SUM <- data.frame(SUM)
colnames(SUM) <- c("module","size","term","pval","OR")
SUM$pval <- -log10(as.numeric(as.character(SUM$pval)))
SUM$OR <- as.numeric(SUM$OR)
SUM$size <- as.numeric(SUM$size)
SUM$term <- as.character(SUM$term)

library(ggplot2)
ggplot(SUM,aes(x=OR,y=pval,size=size,label=term))+
  geom_point(alpha=.25,shape=21,colour="black",fill="blue",stroke=2)+
  geom_text(check_overlap=FALSE,aes(label=ifelse(pval>3,term,""),size=1))+ #,fontface="bold"))+ #hjust=ifelse(pval>3,.1,.3315),vjust= ifelse(OR<60,.6,-.4),fontface="bold"))+
  scale_size_continuous(range = c(3, 20),breaks=c(10,100,1000))+
  xlim(0,20)+
  ylab("-log10(pvalue)")+
  xlab("Odds Ratio")+
  labs(size="Module Size")+
  theme(
    panel.background = element_rect(colour="black",fill="white"),
    legend.title=element_text(size=20),
    legend.text=element_text(size=18),
    axis.text.x=element_text(size=22,face="bold"),
    axis.text.y=element_text(size=22,face="bold"),
    axis.title.x=element_text(size=22,face="bold"),
    axis.title.y = element_text(size=22,face="bold"))

Determine Module Eigengene-Trait Relationships

library(WGCNA) ## Eigengene calculation uses WGCNA code
head(Modules)
gene module
# ABHD17C   c1_2
#   ABCD1   c1_2
# ALDH3B2   c1_2
#  ABHD11   c1_2
#   ACER2   c1_2
#   ACKR2   c1_2

nSamples <-   ## Number of subjects
  moduleColors <- Modules$module ## A vector of module ids corresponding to every gene in counts (note counts need to be genes in columns, subjects in rows)
## Will need counts dataframe to feature column for gene corresponding to each module it is in
expression.table <- counts[,as.vector(Modules$gene)] ## counts should already be transposed before this
MEs0 = moduleEigengenes(expression.table, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, metadata, use = "p",method="spearman") ## Probably use spearman correlation to control for outliers, both MEs and metadata uses subjects for rows
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
adjusted.p <- data.frame(1:10) ## long winded way of getting FDR-adjusted pvalues
for (i in colnames(moduleTraitPvalue)){
  adjust <- p.adjust(moduleTraitPvalue[[i]],method="BH") ## can choose other methods like Bonferroni
  adjusted.p <- cbind(adjusted.p,adjust)
}
adjusted.p <- adjusted.p[,-1]
colnames(adjusted.p) <- colnames(moduleTraitPvalue)

Determine Modules differentially connected using DGCA

counts <- counts ## Normalized counts file
#head(m)
#Control Treatment
#S_1         0         1
#S_10        0         1
#S_100       0         1
#S_103       0         1
#S_106       0         1
#S_107       0         1

metas = data.matrix(m)   ## metas is a column for your group with a 0 or 1

#Control <- ""  ## Use name of column
#Treatment <- "" ## Use name of column
compare = c("Control","Treatment")

modules <-  # module two column file one for gene, one for module
  #head(modules)
  #   gene module
  #ABHD17C   c1_2
  #   ABCD1   c1_2
  # ALDH3B2   c1_2
  #  ABHD11   c1_2
  #   ACER2   c1_2
  #   ACKR2   c1_2
  
  library(DGCA,lib.loc="/sc/arion/projects/DADisorders")
diff.connect <- moduleDC(counts, metas, compare, modules$gene, modules$module, 
                         corr_cutoff = 0.99,
                         signType = "none", 
                         corrType = "pearson", 
                         nPerms = 50,
                         oneSidedPVal = FALSE, 
                         gene_avg_signif = 0.05, 
                         number_DC_genes = 3,
                         dCorAvgMethod = "median")

write.csv(diff.connect,file="Differential.module.connectivity.analysis.csv")

Submitting the script in Minerva via LSF

## MEGENA

#BSUB -L /bin/bash
#BSUB -n 15
#BSUB -R span[ptile=14]
#BSUB -R rusage[mem=4500]
#BSUB -J JOBID
#BSUB -o JOBID.out
#BSUB -e JOBID.err
#BSUB -q premium
#BSUB -P acc_DADdisorders
#BSUB -W 96:00

rfile=your_rfile_path # ENTER PATH OF YOUR R FILE

module load R
module load openssl
module load udunits

R CMD BATCH $rfile

Extracting weights of gene-gene relationships for cytoscape

``` head(el) # row col weight # ABHD17C ADAM12 0.9623204 # ADAM12 ALPP 0.9561197 # ADAM12 ALDH3B2 0.9434451

write.csv(el,file=“weights.for.cytoscape.csv) ```

      ### Quick Option for Plotting Modules in R
      ```
      pnet.obj <- plot_module(output = summary.output,PFN = g,subset.module = "comp1_3",
      layout = "kamada.kawai",label.hubs.only = FALSE,
      gene.set = NULL,color.code =  "grey",
      output.plot = FALSE,out.dir = "modulePlot",col.names = c("magenta","green","cyan"),label.scaleFactor = 2,
      hubLabel.col = "black",hubLabel.sizeProp = 1,show.topn.hubs = Inf,show.legend = TRUE)
      
      ```