Make sure the following modules are loaded in your HPC cluster: ml R openssl udunits
#install.packages("MEGENA",lib=".")
#install.packages("DGCA",lib=".")
library(MEGENA,lib.loc="/sc/arion/projects/DADisorders/")
Use VST transformed read counts (rows are genes,columns are subjects)
#count.path="/sc/hydra/work/rompag01/MEGENA/MEGENA_counts.csv"
#counts <- read.csv(count.path,header=T,row.names=1)
n.cores <- 15; # number of cores to be used for parallelization (need to MATCH those requested in your LSF).
doPar <- TRUE; # TRUE = perform parallel PFN -> MCA. FALSE = no parallelization
method = "pearson" # Currently “pearson” (Pearson’s correlation) and “spearman” (Spearman’s correlation available) for correlation.
FDR <- 0.05; # FDR threshold to identify significant interactions.
n.cor.perm = 100; # Number of permutations in correlation screening.
n.hub.perm = 100; # Number of permutations to calculate hub significance in MHA
mod.pval = 0.05; # module p-value threshold in MCA
hub.pval = 0.05; # hub p-value threshold in MHA
min.size = 10 # minimum module size
max.size = NULL # maximum module size
# annotation to be done on the downstream
annot.table=NULL
id.col = 1
symbol.col= 2
ijw <- calculate.correlation(counts,doPerm = n.cor.perm,output.corTable = FALSE,output.permFDR = FALSE)
run.par = doPar & (getDoParWorkers() == 1)
if (run.par)
{
cl <- parallel::makeCluster(n.cores)
registerDoParallel(cl)
# check how many workers are there
cat(paste("number of cores to use:",getDoParWorkers(),"\n",sep = ""))
}
# Compute PFN
el <- calculate.PFN(ijw[,1:3],doPar = doPar,num.cores = n.cores)
# graph dataframe
g <- graph.data.frame(el,directed = FALSE)
## perform MCA clustering.
MEGENA.output <- do.MEGENA(g,
mod.pval = mod.pval,hub.pval = hub.pval,remove.unsig = TRUE,
min.size = 10,max.size = vcount(g)/2,
doPar = doPar,num.cores = n.cores,n.perm = n.hub.perm,
save.output = FALSE)
# unregister cores as these are not needed anymore.
if (getDoParWorkers() > 1)
{
env <- foreach:::.foreachGlobals
rm(list=ls(name=env), pos=env)
}
summary.output <- MEGENA.ModuleSummary(MEGENA.output,
mod.pvalue = mod.pval,hub.pvalue = hub.pval,
min.size = min.size,max.size = vcount(g)/2,
annot.table = annot.table,id.col = id.col,symbol.col = symbol.col,
output.sig = TRUE)
module.output <- module_convert_to_table(MEGENA.output,mod.pval = 0.05,
hub.pval = 0.05,min.size = 10,max.size=vcount(g)/2)
save(summary.output,MEGENA.output,module.output,g,file="MEGENA.Results.RData")
# load necessary packages
library(GOstats, quietly = TRUE)
library(HGNChelper, quietly = TRUE)
library(org.Hs.eg.db, quietly = TRUE)
library(DGCA,lib.loc="/sc/arion/projects/DADisorders/")
# run ontology using vectors of gene symbols and module IDs
moduleGO_res = moduleGO(
genes = module.output$id,
labels = module.output$module,
universe = rownames(counts),
pval_GO_cutoff = 0.05)
ontology = extractModuleGO(moduleGO_res)
ontology <- ontology[,-length(colnames(ontology))]
### Filter top gene ontology for each module
pvalues <- ontology[,c(1:4,grep("pVal",colnames(ontology)))]
## Get p top Ontology terms output from MEGENA
## Reaad in ontology p value file if needed
#ontology <- read.csv("module_ontology_pvalues.csv",header=T)
## make it just whatever process you care about (e.g. biological process)
pvalues <- pvalues[pvalues$Ontology=="BP",]
modules <- colnames(pvalues[5:length(colnames(pvalues))])
top.ontology <- data.frame()
for (module in modules) {
temps <- pvalues[order(pvalues[module]),]
temp <- temps[1,1:4]
x <- temps[module][[1]]
x <- x[1]
temp$module <- module
temp$pval <- x
top.ontology <- rbind(top.ontology,temp)
}
top.ontology$module <- unique(module.output$module)
write.csv(top.ontology,file="top.ontology.per.module.csv")
### Generate List of Module Lists and determine enrichment of DEGs
### Need module.IDs and two column "modules" dataframe with module and genes
modules <- module.output[,c(1,6)]
module_list = list()
for (module in modules$module){
k <- modules[modules$module==module,]
ids <- as.vector(k$id)
module_list[[module]] <- ids
}
# load in GeneOverlap
library(GeneOverlap)
## DEGs is a list of your DEGs
## DEGs should only be genes in your modules
# DEGs <- list()
degs <- c("Gene1","Gene2","etc.")
#DEGs[["DEGs"]] <- degs
total.genes <- 13048 ## Make this the total number of genes across all modules
Object <- newGOM(DEGs,module_list,total.genes)
overlap.intersect <- getMatrix(Object, name="intersection")
overlap.pval <- getMatrix(Object, name="pval")
overlap.OR <- getMatrix(Object, name="odds.ratio")
Overlap.sum <- cbind(overlap.intersect,overlap.pval,overlap.OR)
#head(Overlap.sum)
# overlap.intersect overlap.pval overlap.OR
#c1_2.ok 0 1.0000000000 0.00000
#c1_3.ok 1 0.0473062048 24.20475
Overlap.sum <- data.frame(Overlap.sum)
write.csv(Overlap.sum,file="Enrichment.for.DEGs.in.Modules.csv")
# Compile module stats
load("example.RData") # features top.ontology and Overlap.sum objects
SUM <- cbind(as.character(top.ontology$module),as.numeric(top.ontology$Size),as.character(top.ontology$Term),Overlap.sum$overlap.pval,as.numeric(Overlap.sum$overlap.OR))
SUM <- data.frame(SUM)
colnames(SUM) <- c("module","size","term","pval","OR")
SUM$pval <- -log10(as.numeric(as.character(SUM$pval)))
SUM$OR <- as.numeric(SUM$OR)
SUM$size <- as.numeric(SUM$size)
SUM$term <- as.character(SUM$term)
library(ggplot2)
ggplot(SUM,aes(x=OR,y=pval,size=size,label=term))+
geom_point(alpha=.25,shape=21,colour="black",fill="blue",stroke=2)+
geom_text(check_overlap=FALSE,aes(label=ifelse(pval>3,term,""),size=1))+ #,fontface="bold"))+ #hjust=ifelse(pval>3,.1,.3315),vjust= ifelse(OR<60,.6,-.4),fontface="bold"))+
scale_size_continuous(range = c(3, 20),breaks=c(10,100,1000))+
xlim(0,20)+
ylab("-log10(pvalue)")+
xlab("Odds Ratio")+
labs(size="Module Size")+
theme(
panel.background = element_rect(colour="black",fill="white"),
legend.title=element_text(size=20),
legend.text=element_text(size=18),
axis.text.x=element_text(size=22,face="bold"),
axis.text.y=element_text(size=22,face="bold"),
axis.title.x=element_text(size=22,face="bold"),
axis.title.y = element_text(size=22,face="bold"))
library(WGCNA) ## Eigengene calculation uses WGCNA code
head(Modules)
gene module
# ABHD17C c1_2
# ABCD1 c1_2
# ALDH3B2 c1_2
# ABHD11 c1_2
# ACER2 c1_2
# ACKR2 c1_2
nSamples <- ## Number of subjects
moduleColors <- Modules$module ## A vector of module ids corresponding to every gene in counts (note counts need to be genes in columns, subjects in rows)
## Will need counts dataframe to feature column for gene corresponding to each module it is in
expression.table <- counts[,as.vector(Modules$gene)] ## counts should already be transposed before this
MEs0 = moduleEigengenes(expression.table, moduleColors)$eigengenes
MEs = orderMEs(MEs0)
moduleTraitCor = cor(MEs, metadata, use = "p",method="spearman") ## Probably use spearman correlation to control for outliers, both MEs and metadata uses subjects for rows
moduleTraitPvalue = corPvalueStudent(moduleTraitCor, nSamples)
adjusted.p <- data.frame(1:10) ## long winded way of getting FDR-adjusted pvalues
for (i in colnames(moduleTraitPvalue)){
adjust <- p.adjust(moduleTraitPvalue[[i]],method="BH") ## can choose other methods like Bonferroni
adjusted.p <- cbind(adjusted.p,adjust)
}
adjusted.p <- adjusted.p[,-1]
colnames(adjusted.p) <- colnames(moduleTraitPvalue)
counts <- counts ## Normalized counts file
#head(m)
#Control Treatment
#S_1 0 1
#S_10 0 1
#S_100 0 1
#S_103 0 1
#S_106 0 1
#S_107 0 1
metas = data.matrix(m) ## metas is a column for your group with a 0 or 1
#Control <- "" ## Use name of column
#Treatment <- "" ## Use name of column
compare = c("Control","Treatment")
modules <- # module two column file one for gene, one for module
#head(modules)
# gene module
#ABHD17C c1_2
# ABCD1 c1_2
# ALDH3B2 c1_2
# ABHD11 c1_2
# ACER2 c1_2
# ACKR2 c1_2
library(DGCA,lib.loc="/sc/arion/projects/DADisorders")
diff.connect <- moduleDC(counts, metas, compare, modules$gene, modules$module,
corr_cutoff = 0.99,
signType = "none",
corrType = "pearson",
nPerms = 50,
oneSidedPVal = FALSE,
gene_avg_signif = 0.05,
number_DC_genes = 3,
dCorAvgMethod = "median")
write.csv(diff.connect,file="Differential.module.connectivity.analysis.csv")
## MEGENA
#BSUB -L /bin/bash
#BSUB -n 15
#BSUB -R span[ptile=14]
#BSUB -R rusage[mem=4500]
#BSUB -J JOBID
#BSUB -o JOBID.out
#BSUB -e JOBID.err
#BSUB -q premium
#BSUB -P acc_DADdisorders
#BSUB -W 96:00
rfile=your_rfile_path # ENTER PATH OF YOUR R FILE
module load R
module load openssl
module load udunits
R CMD BATCH $rfile
``` head(el) # row col weight # ABHD17C ADAM12 0.9623204 # ADAM12 ALPP 0.9561197 # ADAM12 ALDH3B2 0.9434451
write.csv(el,file=“weights.for.cytoscape.csv) ```
### Quick Option for Plotting Modules in R
```
pnet.obj <- plot_module(output = summary.output,PFN = g,subset.module = "comp1_3",
layout = "kamada.kawai",label.hubs.only = FALSE,
gene.set = NULL,color.code = "grey",
output.plot = FALSE,out.dir = "modulePlot",col.names = c("magenta","green","cyan"),label.scaleFactor = 2,
hubLabel.col = "black",hubLabel.sizeProp = 1,show.topn.hubs = Inf,show.legend = TRUE)
```