dir.data <- "./DATASETS/"
library(dplyr)
library(meta)
results.files <- dir(
dir.data,
pattern = "single_cpg_pVal_df.csv",
recursive = TRUE,
full.names = TRUE,
ignore.case = TRUE
)
for(i in results.files){
data <- readr::read_csv(i)
dataset <- unlist(stringr::str_split(i,"//|/"))[3] %>% as.character()
aux <- paste0(dataset, "_", colnames(data)[-1])
colnames(data) <- c("cpg", aux)
assign(dataset,data)
}
cohort_ls <- list(
Gasparoni = GASPARONI,
London_PFC = LONDON,
MtSinai = MTSINAI,
ROSMAP = ROSMAP
)
### outer join input region
multi_cohorts <- Reduce(
function(x,y, ...) merge(x, y, by = "cpg", all = TRUE, ...),
cohort_ls
)
dim(multi_cohorts)
### calculate meta analysis z scores and p values
doParallel::registerDoParallel(cores = parallel::detectCores()/2)
meta_df <- plyr::adply(
.data = multi_cohorts,
.margins = 1,
.fun = function(rowOne_df){
est <- rowOne_df[grep("Estimate.bacon",colnames(rowOne_df))] %>% as.numeric
direction <- paste(
ifelse(
is.na(est), ".",
ifelse(est > 0, "+", "-")
), collapse = "")
se <- rowOne_df[grep("StdErr.bacon",colnames(rowOne_df))] %>% as.numeric
cohort <- gsub("_StdErr.bacon","",grep("StdErr.bacon",colnames(rowOne_df),value = T))
rowOne_reform_df <- data.frame(
cohort = cohort,
est = est,
se = se,
stringsAsFactors = FALSE
)
f <- metagen(
TE = est,
seTE = se,
data = rowOne_reform_df
)
tibble::tibble(
cpg = rowOne_df$cpg,
estimate.bacon = f$TE.fixed,
se.bacon = f$seTE.fixed,
pVal.fixed.bacon = f$pval.fixed,
pVal.random.bacon = f$pval.random,
pValQ.bacon = f$pval.Q,
direction.bacon = direction
)
} , .progress = "time",
.parallel = TRUE,
.id = NULL
)
### create final pVal
meta_df$pVal.final.bacon <- ifelse(
meta_df$pValQ.bacon > 0.05, meta_df$pVal.fixed.bacon, meta_df$pVal.random.bacon
)
### calculate fdr
meta_df$fdr.bacon <- p.adjust(meta_df$pVal.final.bacon, method = "fdr")
### order meta_df
meta_final_df <- meta_df[, c(grep("_",colnames(meta_df),invert = T),
grep("_",colnames(meta_df),invert = F))
]
meta_final_ordered_df <- meta_final_df[order(meta_final_df$pVal.final.bacon),]
library(sesame)
probes.info <- sesameDataGet("HM450.hg19.manifest")
probes.info <- probes.info[meta_final_ordered_df$cpg %>% as.character()] %>%
as.data.frame %>%
dplyr::select(c("seqnames","start","end"))
result_annot_df <- merge(
y = meta_final_ordered_df,
x = probes.info,
by.y = "cpg",
by.x = "row.names",
all.y = TRUE,
sort = FALSE
)
colnames(result_annot_df)[1] <- "cpg"
### final raw data
write.csv(
result_annot_df %>% as.data.frame(),
file.path(dir.data, "meta_analysis_single_cpg_bacon_df.csv"),
row.names = FALSE
)
### Exclude non-significant probes
result_annot_sig_df <- result_annot_df %>% filter(fdr.bacon < 0.05) #dim:3979 24
write.csv(
result_annot_sig_df %>% as.data.frame(),
paste0(dir.data, "meta_analysis_single_cpg_sig_df_bacon.csv"),
row.names = FALSE
)
### Get crosshybrdizing probes
library(ExperimentHub)
eh = ExperimentHub()
query(eh, "DMRcate")
crosshyb <- eh[["EH3129"]]
### Get significant smoking probes
smoking.file <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5267325/bin/NIHMS817273-supplement-001506_-_Supplemental_Tables.xlsx"
if(!file.exists(basename(smoking.file))) downloader::download(smoking.file,basename(smoking.file))
smoking <- readxl::read_xlsx(
basename(smoking.file),
sheet = "02",
skip = 2
)
smoking.sig.probes <- smoking %>% dplyr::filter(`P-value` < 1*10^(-7)) %>% pull("Probe ID")
### Exclude cross-hybridizing and smoking probes
result_annot_sig_df$cpg <- as.character(result_annot_sig_df$cpg)
result_annot_sig_no_crossHyb_smoking_df <- result_annot_sig_df[
!(result_annot_sig_df$cpg %in% c(crosshyb, smoking.sig.probes)),
] #dim: 3751 28
### Add annotation
library(coMethDMR)
result_annot_sig_no_crossHyb_smoking_df$chrom <- as.character(
result_annot_sig_no_crossHyb_smoking_df$seqnames
)
result_final <- AnnotateResults(result_annot_sig_no_crossHyb_smoking_df)
result_final_ordered <- result_final[
,c(
1:4, 46:49, 5:12,
grep("GASPARONI", colnames(result_final), ignore.case = TRUE, invert = FALSE),
grep("LONDON", colnames(result_final), ignore.case = TRUE, invert = FALSE),
grep("MTSINAI", colnames(result_final), ignore.case = TRUE, invert = FALSE),
grep("ROSMAP", colnames(result_final), ignore.case = TRUE, invert = FALSE)
)
]
write.csv(
result_final_ordered %>% as.data.frame(),
paste0(dir.data, "meta_analysis_single_cpg_sig_no_crossHyb_smoking_df_bacon.csv"),
row.names = FALSE
)
result_annot_sig_df_2_4_minus_7 <- result_annot_df %>% filter(pVal.final.bacon < 2.4e-07)
write.csv(
result_annot_sig_df_2_4_minus_7 %>% as.data.frame(),
paste0(dir.data, "meta_analysis_single_cpg_sig_df_bacon_2_4_minus_7.csv"),
row.names = FALSE
)
### Exclude cross-hybridizing and smoking probes
result_annot_sig_df_2_4_minus_7$cpg <- as.character(result_annot_sig_df_2_4_minus_7$cpg)
result_annot_sig_no_crossHyb_smoking_df_2_4_minus_7 <- result_annot_sig_df_2_4_minus_7[
!(result_annot_sig_df_2_4_minus_7$cpg %in% c(crosshyb, smoking.sig.probes)),
] #dim: 3751 28
### Add annotation
library(coMethDMR)
result_annot_sig_no_crossHyb_smoking_df_2_4_minus_7$chrom <- as.character(
result_annot_sig_no_crossHyb_smoking_df_2_4_minus_7$seqnames
)
result_final_2_4_minus_7 <- AnnotateResults(result_annot_sig_no_crossHyb_smoking_df_2_4_minus_7)
result_final_ordered_2_4_minus_7<- result_final_2_4_minus_7[
,c(
1:4, 46:49, 5:12,
grep("GASPARONI", colnames(result_final_2_4_minus_7), ignore.case = TRUE, invert = FALSE),
grep("LONDON", colnames(result_final_2_4_minus_7), ignore.case = TRUE, invert = FALSE),
grep("MTSINAI", colnames(result_final_2_4_minus_7), ignore.case = TRUE, invert = FALSE),
grep("ROSMAP", colnames(result_final_2_4_minus_7), ignore.case = TRUE, invert = FALSE)
)
]
write.csv(
result_final_ordered_2_4_minus_7 %>% as.data.frame(),
paste0(dir.data, "meta_analysis_single_cpg_sig_no_crossHyb_smoking_df_bacon_2_4_minus_7.csv"),
row.names = FALSE
)
readr::read_csv(
paste0(dir.data, "meta_analysis_single_cpg_sig_no_crossHyb_smoking_df_bacon.csv"),
col_types = readr::cols()
)
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 4.0.2 (2020-06-22)
## os macOS Catalina 10.15.6
## system x86_64, darwin17.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-08-11
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib source
## assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.0.2)
## backports 1.1.8 2020-06-17 [1] CRAN (R 4.0.2)
## base64enc 0.1-3 2015-07-28 [1] CRAN (R 4.0.2)
## boot 1.3-25 2020-04-26 [1] CRAN (R 4.0.2)
## callr 3.4.3 2020-03-28 [1] CRAN (R 4.0.2)
## cli 2.0.2 2020-02-28 [1] CRAN (R 4.0.2)
## CompQuadForm 1.4.3 2017-04-12 [1] CRAN (R 4.0.2)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 4.0.2)
## desc 1.2.0 2018-05-01 [1] CRAN (R 4.0.2)
## devtools 2.3.1 2020-07-21 [1] CRAN (R 4.0.2)
## digest 0.6.25 2020-02-23 [1] CRAN (R 4.0.2)
## dplyr * 1.0.1 2020-07-31 [1] CRAN (R 4.0.2)
## ellipsis 0.3.1 2020-05-15 [1] CRAN (R 4.0.2)
## evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.1)
## fansi 0.4.1 2020-01-08 [1] CRAN (R 4.0.2)
## fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.2)
## generics 0.0.2 2018-11-29 [1] CRAN (R 4.0.2)
## glue 1.4.1 2020-05-13 [1] CRAN (R 4.0.2)
## hms 0.5.3 2020-01-08 [1] CRAN (R 4.0.2)
## htmltools 0.5.0 2020-06-16 [1] CRAN (R 4.0.2)
## jsonlite 1.7.0 2020-06-25 [1] CRAN (R 4.0.2)
## knitr 1.29 2020-06-23 [1] CRAN (R 4.0.2)
## lattice 0.20-41 2020-04-02 [1] CRAN (R 4.0.2)
## lifecycle 0.2.0 2020-03-06 [1] CRAN (R 4.0.2)
## lme4 1.1-23 2020-04-07 [1] CRAN (R 4.0.1)
## magrittr 1.5 2014-11-22 [1] CRAN (R 4.0.2)
## MASS 7.3-51.6 2020-04-26 [1] CRAN (R 4.0.2)
## Matrix 1.2-18 2019-11-27 [1] CRAN (R 4.0.2)
## memoise 1.1.0 2017-04-21 [1] CRAN (R 4.0.2)
## meta * 4.13-0 2020-07-03 [1] CRAN (R 4.0.2)
## metafor 2.4-0 2020-03-19 [1] CRAN (R 4.0.2)
## minqa 1.2.4 2014-10-09 [1] CRAN (R 4.0.2)
## nlme 3.1-148 2020-05-24 [1] CRAN (R 4.0.2)
## nloptr 1.2.2.2 2020-07-02 [1] CRAN (R 4.0.2)
## pillar 1.4.6 2020-07-10 [1] CRAN (R 4.0.2)
## pkgbuild 1.1.0 2020-07-13 [1] CRAN (R 4.0.2)
## pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.2)
## pkgload 1.1.0 2020-05-29 [1] CRAN (R 4.0.2)
## prettyunits 1.1.1 2020-01-24 [1] CRAN (R 4.0.2)
## processx 3.4.3 2020-07-05 [1] CRAN (R 4.0.2)
## ps 1.3.3 2020-05-08 [1] CRAN (R 4.0.2)
## purrr 0.3.4 2020-04-17 [1] CRAN (R 4.0.2)
## R6 2.4.1 2019-11-12 [1] CRAN (R 4.0.2)
## Rcpp 1.0.5 2020-07-06 [1] CRAN (R 4.0.2)
## readr 1.3.1 2018-12-21 [1] CRAN (R 4.0.2)
## remotes 2.2.0 2020-07-21 [1] CRAN (R 4.0.2)
## rlang 0.4.7 2020-07-09 [1] CRAN (R 4.0.2)
## rmarkdown 2.3 2020-06-18 [1] CRAN (R 4.0.2)
## rprojroot 1.3-2 2018-01-03 [1] CRAN (R 4.0.2)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.2)
## statmod 1.4.34 2020-02-17 [1] CRAN (R 4.0.2)
## stringi 1.4.6 2020-02-17 [1] CRAN (R 4.0.2)
## stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.2)
## testthat 2.3.2 2020-03-02 [1] CRAN (R 4.0.2)
## tibble 3.0.3 2020-07-10 [1] CRAN (R 4.0.2)
## tidyselect 1.1.0 2020-05-11 [1] CRAN (R 4.0.2)
## usethis 1.6.1 2020-04-29 [1] CRAN (R 4.0.2)
## vctrs 0.3.2 2020-07-15 [1] CRAN (R 4.0.2)
## withr 2.2.0 2020-04-20 [1] CRAN (R 4.0.2)
## xfun 0.16 2020-07-24 [1] CRAN (R 4.0.2)
## yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.2)
##
## [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library