Assess mediating roles of genes or network eigengenes on relationship between independent and dependent variable
suppressPackageStartupMessages(library("mediation"))
suppressPackageStartupMessages(library("stringr"))
Load in your IV, DV (dichotomous),Mediator,Covariates and Normalized Gene Counts as one data frame
#Load in a data frame containing your DEGs and filter accordingly
#Here I load in a deg data frame and then filter for FDR adj.p<0.05
setwd("C:/Users/grrompala/Documents")
deg <- read.csv("../Desktop/Sandy September revisions/sandy.deg.csv",header=T,row.names=1)
head(deg)
## Gene.Type FPKM log2FC p.value FDR.adj..p.value
## RN7SL4P misc_RNA 6.345208 -1.5146400 3.65782e-09 5.99700e-05
## LINC01816 lncRNA 2.974602 -1.0314123 1.04512e-08 8.56741e-05
## RN7SL2 misc_RNA 4455.872103 -0.9486606 3.41892e-08 1.68100e-04
## FBLN1 protein_coding 743.341090 -1.1143737 4.10124e-08 1.68100e-04
## EVA1B protein_coding 94.053261 -0.7098410 6.60749e-08 2.16660e-04
## MUC15 protein_coding 32.900744 -1.3807998 1.50885e-07 2.86004e-04
deg <- deg[deg$FDR.adj..p.value<0.05,] # Grab DEGs based on criteria
deg <- rownames(deg) # Take DEG names as vector
meta <- read.csv("../Desktop/Sandy September revisions/Metadata.09032020.csv",
header=T,row.name=1)
counts <- read.csv("../Desktop/Sandy September revisions/VST.csv",header=T,row.names=1)
meta <- meta[is.na(meta$Anx_T)==F,]
meta$STAI_TTO[is.na(meta$STAI_TTO)==T] <- mean(meta$STAI_TTO,na.rm=T) ## Remove all NA values and replace with means!
counts <- counts[,rownames(meta)]
rownames(counts) <- str_replace_all(rownames(counts),"-",".") ## glm and lm don't like "-" in gene names
total <- cbind(meta,t(counts))
total$Anx60 <- ifelse(total$Anx_T>59,1,0) # Prepare your dicotomoized
head(total[5:10])
## Dep_T Som_T Atyp_T With_T Attn_T Cortisol
## S_100 68 54 48 68 44 20.36
## S_106 41 38 39 42 41 NA
## S_107 46 38 45 48 55 24.53
## S_108 50 60 40 55 49 118.91
## S_109 49 51 45 50 52 250.00
## S_10Q 48 38 43 43 69 250.00
deg <- str_replace_all(deg,"-",".") ## glm and lm don't like "-" in gene names
deg <- deg[deg %in% colnames(total)]
ACME.bank <- data.frame()
ADE.bank <- data.frame()
Total.Effect.bank <- data.frame()
Prop.Mediated.bank <- data.frame()
for (DEG in deg){
gene <- colnames(total)[colnames(total)==DEG]
formula1 <- paste(gene,"~CSEX+STAI_TTO+MOB_AGE+MJtotal2020+smoking2020+SANDY",sep="")
SANDY.GENE <- lm(formula1,total)
formula2 <- paste("Anx60~CSEX+STAI_TTO+MOB_AGE+MJtotal2020+smoking2020+SANDY+",gene,sep="")
GENE.ANX=glm(formula2,total,family="binomial")
results = mediate(SANDY.GENE,GENE.ANX,
treat='SANDY', mediator=gene,boot=T,sims=1000)
temp <- extract_mediation_summary(results)
colnames(temp) <- c("estimate","CI.low","CI.high","pval")
ACME <- temp[8,]
ACME$gene <- DEGE
ADE <- temp[9,]
ADE$gene <- DEGE
Total.Effect <- temp[5,]
Total.Effect$gene <- DEGE
Prop.Mediated <- temp[10,]
Prop.Mediated$gene <- DEGE
ACME.bank <- rbind(ACME.bank,ACME)
ADE.bank <- rbind(ADE.bank,ADE)
Total.Effect.bank <- rbind(Total.Effect.bank,Total.Effect)
Prop.Mediated.bank <- rbind(Prop.Mediated.bank,Prop.Mediated)
}
ACME is the mediation effect
ACDE is the effect of the IV controlling for the mediator
Total Effect is the effect of IV on DV
Prop.Mediated is similar to ACME just as a percentage of Total Effect