#------------------------------------------------------------------------------
# Datasets
# DNAm data – in file “ROSMAP_QNBMIQ_PCfiltered.RDS” at ~\coMethDMR_metaAnalysis\cohort_ROSMAP\data_final
# RNAseq data – in file “ROSMAP_RNAseq_FPKM_gene_plates_1_to_6_normalized.tsv” and “ROSMAP_RNAseq_FPKM_gene_plates_7_to_8_normalized.tsv”
# at ~\coMethDMR_metaAnalysis\DNAm_RNA
# Link for IDs – in file “ROSMAP_IDkey”, column “mwas_id” is for DNAm data, column “rnaseq_id” is for rnaseq
# Phenotype data: ~\coMethDMR_metaAnalysis\cohort_ROSMAP\data_final\pheno721_withNeuronProp_df.RDS
#------------------------------------------------------------------------------
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Libraries
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
library(TCGAbiolinks)
library(dplyr)
library(DelayedMatrixStats)
library(GenomicRanges)
devtools::load_all("/Users/jennyzly/coMethDMR/")
devtools::load_all("/Users/jennyzly/Dropbox (BBSR)/PanCancer/methTF/")
library(doParallel)
registerDoParallel(4)
data.path <- "../code_validation/Meta_analysis_code/DATASETS/ROSMAP"
#------------------------------------------------------------------------------
# 1) RNA-SEQ
# Read and add gene name
#------------------------------------------------------------------------------
# retrieve gene information to map from ID to Symbol
gene.info <- TCGAbiolinks::get.GRCh.bioMart("hg19")
exp.plates_1_to_6 <-
readr::read_tsv("data/ROSMAP_RNAseq_FPKM_gene_plates_1_to_6_normalized.tsv")
exp.plates_1_to_6$geneName <- gene.info$external_gene_name[match(gsub("\\.[0-9]*", "", exp.plates_1_to_6$gene_id),
gene.info$ensembl_gene_id)]
exp.plates_7_to_8 <-
readr::read_tsv("data/ROSMAP_RNAseq_FPKM_gene_plates_7_to_8_normalized.tsv")
exp.plates_7_to_8$geneName <-
gene.info$external_gene_name[match(gsub("\\.[0-9]*", "", exp.plates_7_to_8$gene_id),
gene.info$ensembl_gene_id)]
# Merge data
rna.seq <-
dplyr::full_join(exp.plates_1_to_6, exp.plates_7_to_8) %>%
as.data.frame()
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# DNA methylation
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
dnam.file <- dir(
path = data.path,
pattern = "ROSMAP_QNBMIQ_PCfiltered.RDS",
ignore.case = T,
full.names = T,
recursive = T
)
dnam.file
dna.met <- readRDS(dnam.file)
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Clinical
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
clinical <-
readr::read_csv("ROSMAP_clinical.csv", col_types = readr::cols())
pheno.file <- dir(
path = data.path,
pattern = "withNeuronProp",
ignore.case = T,
full.names = T,
recursive = T
)
pheno.file
phenotype <- readRDS(pheno.file)
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
# Map RNA-seq and DNA methylation
#-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
map <- readr::read_csv("data/ROSMAP_IDkey.csv")
# Only keep samples with DNA methytlation and gene expression
map <- na.omit(unique(map[, c("mwas_id", "rnaseq_id")]))
map <- map[map$mwas_id %in% colnames(dna.met) &
map$rnaseq_id %in% gsub("_[0-9]$", "", c(colnames(rna.seq))),]
dna.met <-
dna.met[, colnames(dna.met) %in% map$mwas_id] # Samples 529
matched.exp <- rna.seq[, c(2,
match(map$rnaseq_id, gsub("_[0-9]$", "", c(colnames(
rna.seq
)))))]
rownames(matched.exp) <- gsub("\\.[0-9]*$", "", matched.exp$gene_id)
matched.exp$gene_id <- NULL
matched.dnam <- dna.met[, match(map$mwas_id, colnames(dna.met))]
matched.phenotype <-
phenotype[match(map$mwas_id , phenotype$Sample),]
#----------------- CHECKS ----------------------
table(is.na(matched.phenotype$Sample))
nrow(matched.phenotype) == ncol(matched.dnam)
nrow(matched.phenotype) == ncol(matched.exp)
#-------------------------------------------
matched.phenotype$rnaseq_id <-
map$rnaseq_id[match(matched.phenotype$Sample, map$mwas_id)]
#------------------------------------------------------------------------------
# Cell type markers
#------------------------------------------------------------------------------
genes <- c("ENO2", "OLIG2", "CD34", "CD68", "GFAP")
gene.ensg <-
gene.info$ensembl_gene_id[match(genes, gene.info$external_gene_name)]
cell.type.markers <-
matched.exp[match(gene.ensg, rownames(matched.exp)),] %>% t
colnames(cell.type.markers) <- genes
cell.type.markers <-
cell.type.markers %>% tibble::as_tibble(rownames = "rnaseq_id")
cell.type.markers$Sample <-
map$mwas_id[match(gsub("_[0-9]$", "", cell.type.markers$rnaseq_id),
map$rnaseq_id)]
cell.type.markers <-
cell.type.markers[!is.na(cell.type.markers$Sample),]
saveRDS(cell.type.markers, file = "data/cell.type.markers.Rds")
save(matched.phenotype,
matched.dnam,
matched.exp,
map,
file = "data/matched_data.rda")
meta.analysis.folder <- "../code_validation/Meta_analysis_code/meta_analysis_region_results/"
# focus on significant methylation regions with FDR < 0.05 in file “meta_analysis_ALL_df.csv”
analysis <- readr::read_csv(
file.path(meta.analysis.folder,"step1_meta_analysis/meta_analysis_ALL_df.csv"),
col_types = readr::cols()
)
region.analysis <- readr::read_csv(
file.path(meta.analysis.folder,"step4_dmr_vs_cpgs/meta_analysis_sig_no_crossHyb_smoking_ov_comb_p_with_sig_single_cpgs.csv"),
col_types = readr::cols()
)
dmr <- region.analysis %>% filter(!is.na(ROSMAP_coMethRegion))
dim(dmr)
## [1] 118 67
dmr.hypo <- region.analysis %>% filter(estimate < 0)
dim(dmr.hypo)
## [1] 31 67
dmr.hyper <- region.analysis %>% filter(estimate > 0)
dim(dmr.hyper)
## [1] 88 67
For each significant co-methylated region, we looked for genes within \(250Kbp\), removed confounding effects in gene expression and DNA methylation levels, and then correlated DNAm with gene expression with the following model, for cases and controls separately:
\[rna_{residual} \sim met_{residual} \]
\(Median(\text{DNAm m-values in DMR}) \sim celltype.proportion + batch + \text{sample plate} + ageAtDeath + sex => \text{DNAm residuals}\)
file <- "results/ROSMAP.probes.all.meta.analysis.regions.rda"
if(!file.exists(file)){
regions.name <- na.omit(results.meta.analysis.region$ROSMAP_coMethRegion) %>% as.character() %>% unique()
probes.all.regions <- GetCpGsInAllRegion(regions.name, progress = TRUE, nCores_int = 10)
save(probes.all.regions, file = file)
} else {
load(file)
}
median.met <- plyr::ldply(
probes.all.regions[unique(dmr$ROSMAP_coMethRegion)],
function(probes){
aux <- colMedians(matched.dnam,rows = rownames(matched.dnam) %in% probes)
aux
},
.progress = "time",
.parallel = TRUE,
.inform = TRUE,
.id = "region"
)
rownames(median.met) <- median.met$region %>% as.character()
median.met$region <- NULL
colnames(median.met) <- colnames(matched.dnam)
# 1) remove confounding effects in DNAm data:
resid_met <- GetResiduals(
dnam = median.met,
betaToM = TRUE, #converts to Mvalues for fitting linear model
pheno_df = matched.phenotype,
covariates_char = c("Sample_Plate", "prop.neuron", "batch","msex","age_death"),
nCores_int = 1,
progressbar = TRUE
)
\(log2(RNA) \sim ageAtDeath + sex + \text{markers for cell types} => \text{RNA residuals}\)
matched.exp.log2 <- log2(matched.exp + 1)
markers <-
t(matched.exp.log2[c(
"ENSG00000111674",
"ENSG00000129226",
"ENSG00000131095",
"ENSG00000205927",
"ENSG00000174059"
), ])
colnames(markers) <- c("markers_ENO2",
"markers_OLIG2",
"markers_CD34",
"markers_CD68",
"markers_GFAP")
matched.phenotype$rnaseq_id <- map$rnaseq_id[match(matched.phenotype$Sample,map$mwas_id)]
resid_exp <- plyr::adply(.data = matched.exp.log2,
.margins = 1,
function(row){
val <- t(row)
colnames(val) <- "val"
dat <- cbind(val,
matched.phenotype,
markers
)
dat$val <- as.numeric(dat$val)
fitE <- lm("val ~ age_death + msex + markers_ENO2 + markers_OLIG2 + markers_CD34 + markers_CD68 + markers_GFAP",
data = dat,
na.action = na.exclude)
residuals(fitE)
}, .progress = "time",
.parallel = TRUE)
rownames(resid_exp) <- rownames(matched.exp.log2)
save(resid_exp,
resid_met,
file = "data/residuals.rda")
The function getDNAm.target
will extend the regions \(+-250Kbp\) and return the overlapping genes.
regions.gr <- rownames(resid_met) %>%
as.data.frame %>%
tidyr::separate(col = ".",into = c("chr","start","end")) %>%
makeGRangesFromDataFrame()
names(regions.gr) <- rownames(resid_met)
regions.gr
## GRanges object with 118 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## chr19:49220102-49220235 chr19 49220102-49220235 *
## chr7:27153580-27153847 chr7 27153580-27153847 *
## chr7:27146237-27146445 chr7 27146237-27146445 *
## chr7:27154720-27155548 chr7 27154720-27155548 *
## chr7:27179268-27179432 chr7 27179268-27179432 *
## ... ... ... ...
## chr6:25042495-25042548 chr6 25042495-25042548 *
## chr6:32906460-32906734 chr6 32906460-32906734 *
## chr12:120835663-120835778 chr12 120835663-120835778 *
## chr22:37608611-37608819 chr22 37608611-37608819 *
## chr16:87886871-87886933 chr16 87886871-87886933 *
## -------
## seqinfo: 22 sequences from an unspecified genome; no seqlengths
# function available in methTF package https://gitlab.com/tiagochst/methtf
regions.genes <- get_region_target_gene(regions.gr = regions.gr,
genome = "hg19",
method = "window",
window.width = 500 * 10 ^ 3) # 500 kb
regions.genes <- regions.genes %>%
dplyr::filter(regions.genes$gene_ensembl_gene_id %in% rownames(resid_exp))
dim(regions.genes)
## [1] 2054 11
head(regions.genes)
Linear models:
# http://www.r-tutor.com/elementary-statistics/simple-linear-regression/residual-plot
tab <- plyr::adply(
regions.genes,
.margins = 1,
.fun = function(row) {
tryCatch({
rna.target <-
resid_exp[rownames(resid_exp) == row$gene_ensembl_gene_id, , drop = FALSE]
met.residual <- resid_met[rownames(resid_met) == as.character(row$regionID), ]
df <- data.frame(
rna.residual = rna.target %>% as.numeric,
met.residual = met.residual %>% as.numeric,
Braak_stage = matched.phenotype$braaksc,
group = ifelse(matched.phenotype$braaksc < 3, "Control", "Case")
)
# 2) fit linear model to cases and controls seperately:
# For cases (Braak stage 3-6)
# RNA_residuals ~ DNAm_residuals
results.cases <- lm (
rna.residual ~ met.residual,
data = df[df$Braak_stage > 2, ]
)
results.cases.pval <- summary(results.cases)$coefficients[-1, 4, drop = F] %>% t %>% as.data.frame()
results.cases.estimate <- summary(results.cases)$coefficients[-1, 1, drop = F] %>% t %>% as.data.frame()
colnames(results.cases.pval) <- paste0("cases_pval_", colnames(results.cases.pval))
colnames(results.cases.estimate) <- paste0("cases_estimate_", colnames(results.cases.estimate))
# For controls (Braak stage 0 -2)
# RNA_residuals ~ DNAm_residuals
results.control <- lm (
rna.residual ~ met.residual,
data = df[df$Braak_stage < 3, ]
)
results.control.pval <- summary(results.control)$coefficients[-1, 4, drop = F] %>% t %>% as.data.frame()
results.control.estimate <- summary(results.control)$coefficients[-1, 1, drop = F] %>% t %>% as.data.frame()
colnames(results.control.pval) <- paste0("control_pval_", colnames(results.control.pval))
colnames(results.control.estimate) <- paste0("control_estimate_", colnames(results.control.estimate))
return(
data.frame(
cbind(
results.cases.pval,
results.cases.estimate,
results.control.pval,
results.control.estimate
),
row.names = NULL,
stringsAsFactors = FALSE
))
}, error = function(e) {
print(row)
return()
})
},
.id = NULL,
.progress = "time",
.parallel = TRUE,
.inform = TRUE
)
readr::write_csv(tab,path = "results/results_regions_lm_250kb_window.csv")
tab <- readr::read_csv("results/results_regions_lm_250kb_window.csv", col_types = readr::cols())
tab$fdr.controls <- p.adjust(tab$control_pval_met.residual,method = "fdr")
tab$fdr.cases <- p.adjust(tab$cases_pval_met.residual,method = "fdr")
output <- tab[,c("regionID","gene_external_gene_name",
"cases_estimate_met.residual","cases_pval_met.residual","fdr.cases",
"control_estimate_met.residual","control_pval_met.residual","fdr.controls")]
colnames(output) <- c("coMethDMR",
"geneSymbol",
"estimate.cases",
"pval.cases",
"estimate.controls",
"pval.controls"
)
cols <- c(grep("ROSMAP_coMethRegion",colnames(region.analysis)),
grep("Relation",colnames(region.analysis)):grep("^smoke_bi$",colnames(region.analysis)))
output2 <- merge(output,
region.analysis[,cols],
by.x = "coMethDMR",
by.y = "ROSMAP_coMethRegion",
all.x = TRUE)
write.csv(output2,file = "results/results_regions_lm_250kb_window_renamed.csv")
single.cpg.analysis <- readr::read_csv(
"../code_validation/Meta_analysis_code/meta_analysis_single_cpg_results/meta_analysis_single_cpg_sig_no_crossHyb_smoking_df.csv",
col_types = readr::cols()
)
dmc <- single.cpg.analysis %>% filter(!is.na(ROSMAP_pValue))
dim(dmc)
## [1] 3699 32
dmc.hypo <- dmc %>% filter(estimate < 0)
dim(dmc.hypo)
## [1] 1542 32
dmc.hyper <- dmc %>% filter(estimate > 0)
dim(dmc.hyper)
## [1] 2157 32
For each CpG significantly associated with Braak stage, we looked for genes within \(250Kbp\), removed confounding effects in gene expression and DNA methylation levels, and then correlated residual methylation with gene expression levels with the following model, for cases and controls separately:
\[rna_{residual} \sim met_{residual} \]
\(Median(\text{DNAm m-values in DMR}) \sim celltype.proportion + batch + \text{sample plate} + ageAtDeath + sex => \text{DNAm residuals}\)
# 1) remove confounding effects in DNAm data:
resid_met_cpg <- GetResiduals(
dnam = matched.dnam[dmc$cpg,],
betaToM = TRUE, #converts to Mvalues for fitting linear model
pheno_df = matched.phenotype,
covariates_char = c("Sample_Plate", "prop.neuron", "batch","msex","age_death"),
nCores_int = 1,
progressbar = TRUE
)
save(resid_exp,
resid_met_cpg,
file = "data/residuals_cpg.rda")
The function get_region_target_gene
will extend the regions \(+-250Kbp\) and return the overlapping genes.
dmc.gr <- sesameData::sesameDataGet("HM450.hg19.manifest")[dmc$cpg,]
regions.genes <- get_region_target_gene(
regions.gr = dmc.gr,
genome = "hg19",
method = "window",
window.width = 500 * 10 ^ 3) # 500 kb
regions.genes <- regions.genes %>%
dplyr::filter(regions.genes$gene_ensembl_gene_id %in% rownames(resid_exp))
regions.genes$cpg <- names(dmc.gr)[
match(
regions.genes$regionID,paste0(as.data.frame(dmc.gr)$seqnames,
":",
as.data.frame(dmc.gr)$start,"-",as.data.frame(dmc.gr)$end)
)
]
dim(regions.genes)
## [1] 29186 12
head(regions.genes)
Linear models: - For cases (Braak stage 3-6): \(\text{RNA residuals} \sim \text{DNAm residuals}\) - For controls (Braak stage 0-2): \(\text{RNA residuals} \sim \text{DNAm residuals}\)
# http://www.r-tutor.com/elementary-statistics/simple-linear-regression/residual-plot
tab.cpg <- plyr::adply(
regions.genes,
.margins = 1,
.fun = function(row) {
tryCatch({
rna.target <-
resid_exp[rownames(resid_exp) == row$gene_ensembl_gene_id, , drop = FALSE]
met.residual <-
resid_met_cpg[rownames(resid_met_cpg) == as.character(row$cpg), ]
df <-
data.frame(
rna.residual = rna.target %>% as.numeric,
met.residual = met.residual %>% as.numeric,
Braak_stage = matched.phenotype$braaksc,
group = ifelse(matched.phenotype$braaksc < 3, "Control", "Case")
)
# 2) fit linear model to cases and controls seperately:
# For cases (Braak stage 3-6)
# RNA_residuals ~ DNAm_residuals
results.cases <-
lm (
rna.residual ~ met.residual,
data = df[df$Braak_stage > 2, ]
)
results.cases.pval <-
summary(results.cases)$coefficients[-1, 4, drop = F] %>% t %>% as.data.frame()
results.cases.estimate <-
summary(results.cases)$coefficients[-1, 1, drop = F] %>% t %>% as.data.frame()
colnames(results.cases.pval) <-
paste0("cases_pval_", colnames(results.cases.pval))
colnames(results.cases.estimate) <-
paste0("cases_estimate_", colnames(results.cases.estimate))
# For controls (Braak stage 0 -2)
# RNA_residuals ~ DNAm_residuals
results.control <-
lm (
rna.residual ~ met.residual,
data = df[df$Braak_stage < 3, ]
)
results.control.pval <-
summary(results.control)$coefficients[-1, 4, drop = F] %>% t %>% as.data.frame()
results.control.estimate <-
summary(results.control)$coefficients[-1, 1, drop = F] %>% t %>% as.data.frame()
colnames(results.control.pval) <-
paste0("control_pval_", colnames(results.control.pval))
colnames(results.control.estimate) <-
paste0("control_estimate_", colnames(results.control.estimate))
return(
data.frame(
cbind(
results.cases.pval,
results.cases.estimate,
results.control.pval,
results.control.estimate
),
row.names = NULL,
stringsAsFactors = FALSE
))
}, error = function(e) {
print(row)
return()
})
},
.id = NULL,
.progress = "time",
.parallel = TRUE,
.inform = TRUE
)
readr::write_csv(tab.cpg,path = "results/results_single_cpg_lm_250kb_window.csv")
tab.cpg <- readr::read_csv("results/results_single_cpg_lm_250kb_window.csv", col_types = readr::cols())
tab.cpg$fdr.controls <- p.adjust(tab.cpg$control_pval_met.residual,method = "fdr")
tab.cpg$fdr.cases <- p.adjust(tab.cpg$cases_pval_met.residual,method = "fdr")
output <- tab.cpg[,c("cpg","gene_external_gene_name",
"cases_estimate_met.residual","cases_pval_met.residual","fdr.cases",
"control_estimate_met.residual","control_pval_met.residual","fdr.controls")]
colnames(output) <- c("cpg",
"geneSymbol",
"estimate.cases",
"pval.cases",
"estimate.controls",
"pval.controls"
)
write.csv(output,file = "results/results_single_cpg_lm_250kb_window_renamed.csv")
dmr <- read.csv(
"results/results_regions_lm_250kb_window_renamed.csv"
)
pathDropbox <- file.path(dir("~", pattern = "Dropbox", full.names = TRUE))
dmr_meta <- read.csv(
file.path(pathDropbox,
"coMethDMR_metaAnalysis/",
"code_validation/Meta_analysis_code/meta_analysis_region_results",
"/step4_dmr_vs_cpgs/meta_analysis_sig_no_crossHyb_smoking_ov_comb_p_with_all.csv")
)[, c("ROSMAP_coMethRegion",
"GREAT_annotation",
"UCSC_RefGene_Group",
"UCSC_RefGene_Accession",
"UCSC_RefGene_Name",
"state")
]
dmr.annot <- merge(
dmr, dmr_meta,
by.x = "coMethDMR",
by.y = "ROSMAP_coMethRegion",
sort = FALSE
)
dmr.annot <- dmr.annot[, c(1,3:9,21:24,10,25,11:18)]
write.csv(
dmr.annot,
"results_regions_lm_250kb_window_renamed_with_annot.csv"
)
dmr_case <- dmr.annot %>%
group_by(coMethDMR) %>%
filter(pval.cases == min(pval.cases)) %>%
as.data.frame()
dmr_case$fdr.cases.adjusted <- p.adjust(
dmr_case$pval.cases, method = "fdr"
)
dmr_case <- dmr_case[,c(1:4, 23,9:22)]
dmr_case
write.csv(
dmr_case,
"results_regions_lm_250kb_window_renamed_mostSigCases.csv",
row.names = FALSE
)
dmr_control <- dmr.annot %>%
group_by(coMethDMR) %>%
filter(pval.controls == min(pval.controls)) %>%
as.data.frame()
dmr_control$fdr.controls.adjusted <- p.adjust(
dmr_control$pval.controls, method = "fdr"
)
dmr_control <- dmr_control[,c(1:2, 6:7, 23,9:22)]
dmr_control
write.csv(
dmr_control,
"results_regions_lm_250kb_window_renamed_mostSigControls.csv",
row.names = FALSE
)
cpg <- read.csv(
"results/results_single_cpg_lm_250kb_window_renamed.csv"
)
cpg_meta <- read.csv(
file.path(pathDropbox,
"coMethDMR_metaAnalysis/",
"code_validation/Meta_analysis_code/meta_analysis_single_cpg_results/",
"/meta_analysis_single_cpg_sig_no_crossHyb_smoking_with_state_greatAnnot_df.csv")
)[, c("cpg",
"GREAT_annotation",
"UCSC_RefGene_Group",
"UCSC_RefGene_Accession",
"UCSC_RefGene_Name",
"Relation_to_Island",
"state",
"estimate",
"se",
"pVal.fixed",
"pVal.random",
"pValQ",
"direction",
"pVal.final",
"fdr")
]
cpg.annot <- merge(
cpg, cpg_meta,
by = "cpg",
sort = FALSE
) %>%
select(-X)
cpg.annot
write.csv(
cpg.annot,
"results_single_cpg_lm_250kb_window_renamed_with_annot.csv"
)
cpg_case <- cpg.annot %>%
group_by(cpg) %>%
filter(pval.cases == min(pval.cases)) %>%
as.data.frame()
cpg_case$fdr.cases.adjusted <- p.adjust(
cpg_case$pval.cases, method = "fdr"
)
cpg_case <- cpg_case[, c(1:4, 25, 15,17:24)]
cpg_case
write.csv(
cpg_case,
"results_single_cpg_lm_250kb_window_renamed_mostSigCases.csv",
row.names = FALSE
)
cpg_control <- cpg.annot %>%
group_by(cpg) %>%
filter(pval.controls == min(pval.controls)) %>%
distinct(cpg, .keep_all = TRUE) %>%
as.data.frame()
cpg_control$fdr.controls.adjusted <- p.adjust(
cpg_control$pval.controls, method = "fdr"
)
cpg_control <- cpg_control[, c(1:2,6:7, 25, 15, 17:24)]
cpg_control
write.csv(
cpg_control,
"results_single_cpg_lm_250kb_window_renamed_mostSigControls.csv",
row.names = FALSE
)
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R Under development (unstable) (2020-02-25 r77857)
## os macOS Catalina 10.15.3
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-04-26
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## ! package * version date lib
## acepack 1.4.1 2016-10-29 [1]
## annotate 1.65.1 2020-01-27 [1]
## AnnotationDbi 1.49.1 2020-01-25 [1]
## AnnotationFilter 1.11.0 2019-11-06 [1]
## AnnotationHub * 2.19.11 2020-04-16 [1]
## aroma.light 3.17.1 2019-12-09 [1]
## askpass 1.1 2019-01-13 [1]
## assertthat 0.2.1 2019-03-21 [1]
## backports 1.1.6 2020-04-05 [1]
## base64 2.0 2016-05-10 [1]
## base64enc 0.1-3 2015-07-28 [1]
## beanplot 1.2 2014-09-19 [1]
## Biobase * 2.47.3 2020-03-16 [1]
## BiocFileCache * 1.11.6 2020-04-16 [1]
## BiocGenerics * 0.33.3 2020-03-23 [1]
## BiocManager 1.30.10 2019-11-16 [1]
## BiocParallel 1.21.2 2019-12-21 [1]
## BiocVersion 3.11.1 2019-11-13 [1]
## biomaRt 2.43.5 2020-04-02 [1]
## Biostrings 2.55.7 2020-03-24 [1]
## biovizBase 1.35.1 2019-12-03 [1]
## bit 1.1-15.2 2020-02-10 [1]
## bit64 0.9-7 2017-05-08 [1]
## bitops 1.0-6 2013-08-17 [1]
## blob 1.2.1 2020-01-20 [1]
## boot 1.3-24 2019-12-20 [1]
## broom 0.5.6 2020-04-20 [1]
## BSgenome 1.55.4 2020-03-19 [1]
## bumphunter 1.29.0 2019-11-07 [1]
## callr 3.4.3 2020-03-28 [1]
## checkmate 2.0.0 2020-02-06 [1]
## circlize 0.4.8 2019-09-08 [1]
## cli 2.0.2 2020-02-28 [1]
## clue 0.3-57 2019-02-25 [1]
## cluster 2.1.0 2019-06-19 [1]
## codetools 0.2-16 2018-12-24 [1]
## colorspace 1.4-1 2019-03-18 [1]
## P coMethDMR * 0.0.0.9001 2020-03-24 [?]
## ComplexHeatmap * 2.3.4 2020-04-02 [1]
## crayon 1.3.4 2017-09-16 [1]
## curl 4.3 2019-12-02 [1]
## data.table 1.12.9 2020-02-26 [1]
## DBI 1.1.0 2019-12-15 [1]
## dbplyr * 1.4.3 2020-04-19 [1]
## DelayedArray * 0.13.12 2020-04-10 [1]
## DelayedMatrixStats * 1.9.1 2020-03-30 [1]
## desc 1.2.0 2018-05-01 [1]
## DESeq 1.39.0 2019-11-06 [1]
## devtools 2.3.0 2020-04-10 [1]
## dichromat 2.0-0 2013-01-24 [1]
## digest 0.6.25 2020-02-23 [1]
## doParallel * 1.0.15 2019-08-02 [1]
## doRNG 1.8.2 2020-01-27 [1]
## downloader 0.4 2015-07-09 [1]
## dplyr * 0.8.99.9002 2020-04-02 [1]
## EDASeq 2.21.2 2020-03-20 [1]
## edgeR 3.29.1 2020-02-26 [1]
## ellipsis 0.3.0 2019-09-20 [1]
## ELMER 2.11.1 2020-04-20 [1]
## ELMER.data 2.11.0 2019-10-31 [1]
## ensembldb 2.11.4 2020-04-17 [1]
## evaluate 0.14 2019-05-28 [1]
## ExperimentHub * 1.13.7 2020-04-16 [1]
## fansi 0.4.1 2020-01-08 [1]
## fastmap 1.0.1 2019-10-08 [1]
## foreach * 1.5.0 2020-03-30 [1]
## foreign 0.8-78 2020-04-13 [1]
## Formula 1.2-3 2018-05-03 [1]
## fs 1.4.1 2020-04-04 [1]
## genefilter 1.69.0 2019-11-06 [1]
## geneplotter 1.65.0 2019-11-06 [1]
## generics 0.0.2 2018-11-29 [1]
## GenomeInfoDb * 1.23.17 2020-04-13 [1]
## GenomeInfoDbData 1.2.3 2020-04-20 [1]
## GenomicAlignments 1.23.2 2020-03-24 [1]
## GenomicFeatures 1.39.7 2020-03-19 [1]
## GenomicRanges * 1.39.3 2020-04-08 [1]
## GEOquery 2.55.1 2019-11-18 [1]
## GetoptLong 0.1.8 2020-01-08 [1]
## ggplot2 * 3.3.0 2020-03-05 [1]
## ggpubr * 0.2.5 2020-02-13 [1]
## ggrepel 0.8.2 2020-03-08 [1]
## ggsignif 0.6.0 2019-08-08 [1]
## ggthemes 4.2.0 2019-05-13 [1]
## GlobalOptions 0.1.1 2019-09-30 [1]
## glue 1.4.0 2020-04-03 [1]
## gridExtra 2.3 2017-09-09 [1]
## gtable 0.3.0 2019-03-25 [1]
## Gviz 1.31.12 2020-03-05 [1]
## HDF5Array 1.15.18 2020-04-10 [1]
## Hmisc 4.4-0 2020-03-23 [1]
## hms 0.5.3 2020-01-08 [1]
## htmlTable 1.13.3 2019-12-04 [1]
## htmltools 0.4.0 2019-10-04 [1]
## htmlwidgets 1.5.1 2019-10-08 [1]
## httpuv 1.5.2 2019-09-11 [1]
## httr 1.4.1 2019-08-05 [1]
## hwriter 1.3.2 2014-09-10 [1]
## IlluminaHumanMethylation450kanno.ilmn12.hg19 0.6.0 2020-03-24 [1]
## IlluminaHumanMethylationEPICanno.ilm10b2.hg19 0.6.0 2020-03-24 [1]
## illuminaio 0.29.0 2019-11-06 [1]
## interactiveDisplayBase 1.25.0 2019-11-06 [1]
## IRanges * 2.21.8 2020-03-25 [1]
## iterators * 1.0.12 2019-07-26 [1]
## jpeg 0.1-8.1 2019-10-24 [1]
## jsonlite 1.6.1 2020-02-02 [1]
## km.ci 0.5-2 2009-08-30 [1]
## KMsurv 0.1-5 2012-12-03 [1]
## knitr 1.28 2020-02-06 [1]
## later 1.0.0 2019-10-04 [1]
## lattice 0.20-41 2020-04-02 [1]
## latticeExtra 0.6-29 2019-12-19 [1]
## lazyeval 0.2.2 2019-03-15 [1]
## lifecycle 0.2.0 2020-03-06 [1]
## limma 3.43.8 2020-04-14 [1]
## lme4 1.1-23 2020-04-07 [1]
## lmerTest 3.1-2 2020-04-08 [1]
## locfit 1.5-9.4 2020-03-25 [1]
## magrittr * 1.5 2014-11-22 [1]
## MASS 7.3-51.5 2019-12-20 [1]
## Matrix 1.2-18 2019-11-27 [1]
## matrixStats * 0.56.0 2020-03-13 [1]
## mclust 5.4.6 2020-04-11 [1]
## memoise 1.1.0 2017-04-21 [1]
## P methTF * 0.1.0 2020-03-24 [?]
## mgcv 1.8-31 2019-11-09 [1]
## mime 0.9 2020-02-04 [1]
## minfi 1.33.1 2020-03-05 [1]
## minqa 1.2.4 2014-10-09 [1]
## MultiAssayExperiment 1.13.21 2020-04-13 [1]
## multtest 2.43.1 2020-03-12 [1]
## munsell 0.5.0 2018-06-12 [1]
## nlme 3.1-147 2020-04-13 [1]
## nloptr 1.2.2.1 2020-03-11 [1]
## nnet 7.3-13 2020-02-25 [1]
## nor1mix 1.3-0 2019-06-13 [1]
## numDeriv 2016.8-1.1 2019-06-06 [1]
## openssl 1.4.1 2019-07-18 [1]
## parsetools 0.1.3 2020-04-08 [1]
## pillar 1.4.3 2019-12-20 [1]
## pkgbuild 1.0.6 2019-10-09 [1]
## pkgcond 0.1.0 2018-12-03 [1]
## pkgconfig 2.0.3 2019-09-22 [1]
## pkgload 1.0.2 2018-10-29 [1]
## plotly 4.9.2.1 2020-04-04 [1]
## plyr 1.8.6 2020-03-03 [1]
## png 0.1-7 2013-12-03 [1]
## postlogic 0.1.0.1 2019-12-18 [1]
## preprocessCore 1.49.2 2020-02-01 [1]
## prettyunits 1.1.1 2020-01-24 [1]
## processx 3.4.2 2020-02-09 [1]
## progress 1.2.2 2019-05-16 [1]
## promises 1.1.0 2019-10-04 [1]
## ProtGenerics 1.19.3 2019-12-25 [1]
## ps 1.3.2 2020-02-13 [1]
## purrr 0.3.4 2020-04-17 [1]
## purrrogress 0.1.1 2019-07-22 [1]
## quadprog 1.5-8 2019-11-20 [1]
## R.methodsS3 1.8.0 2020-02-14 [1]
## R.oo 1.23.0 2019-11-03 [1]
## R.utils 2.9.2 2019-12-08 [1]
## R6 2.4.1 2019-11-12 [1]
## rappdirs 0.3.1 2016-03-28 [1]
## RColorBrewer 1.1-2 2014-12-07 [1]
## Rcpp 1.0.4.6 2020-04-09 [1]
## RCurl 1.98-1.2 2020-04-18 [1]
## readr 1.3.1 2018-12-21 [1]
## remotes 2.1.1 2020-02-15 [1]
## reshape 0.8.8 2018-10-23 [1]
## reshape2 1.4.4 2020-04-09 [1]
## rhdf5 2.31.10 2020-04-02 [1]
## Rhdf5lib 1.9.3 2020-04-15 [1]
## rjson 0.2.20 2018-06-08 [1]
## rlang 0.4.5.9000 2020-03-20 [1]
## rmarkdown 2.1 2020-01-20 [1]
## rngtools 1.5 2020-01-23 [1]
## rpart 4.1-15 2019-04-12 [1]
## rprojroot 1.3-2 2018-01-03 [1]
## Rsamtools 2.3.7 2020-03-18 [1]
## RSQLite 2.2.0 2020-01-07 [1]
## rstudioapi 0.11 2020-02-07 [1]
## rtracklayer 1.47.0 2019-11-06 [1]
## rvest 0.3.5 2019-11-08 [1]
## S4Vectors * 0.25.15 2020-04-04 [1]
## scales 1.1.0 2019-11-18 [1]
## scrime 1.3.5 2018-12-01 [1]
## selectr 0.4-2 2019-11-20 [1]
## sesameData * 1.5.0 2019-10-31 [1]
## sessioninfo 1.1.1 2018-11-05 [1]
## shape 1.4.4 2018-02-07 [1]
## shiny 1.4.0.2 2020-03-13 [1]
## ShortRead 1.45.4 2020-03-18 [1]
## siggenes 1.61.0 2019-11-06 [1]
## statmod 1.4.34 2020-02-17 [1]
## stringi 1.4.6 2020-02-17 [1]
## stringr 1.4.0 2019-02-10 [1]
## SummarizedExperiment * 1.17.5 2020-03-27 [1]
## survival 3.1-12 2020-04-10 [1]
## survminer 0.4.6 2019-09-03 [1]
## survMisc 0.5.5 2018-07-05 [1]
## sva 3.35.2 2020-03-22 [1]
## TCGAbiolinks * 2.15.3 2019-12-17 [1]
## testextra 0.1.0.1 2019-12-18 [1]
## testthat * 2.3.2 2020-03-02 [1]
## tibble 3.0.1 2020-04-20 [1]
## tidyr * 1.0.2 2020-01-24 [1]
## tidyselect 1.0.0 2020-01-27 [1]
## usethis 1.6.0 2020-04-09 [1]
## VariantAnnotation 1.33.4 2020-04-09 [1]
## vctrs 0.2.99.9010 2020-04-02 [1]
## viridisLite 0.3.0 2018-02-01 [1]
## withr 2.1.2 2018-03-15 [1]
## xfun 0.13 2020-04-13 [1]
## XML 3.99-0.3 2020-01-20 [1]
## xml2 1.3.1 2020-04-09 [1]
## xtable 1.8-4 2019-04-21 [1]
## XVector 0.27.2 2020-03-24 [1]
## yaml 2.2.1 2020-02-01 [1]
## zlibbioc 1.33.1 2020-01-24 [1]
## zoo 1.8-7 2020-01-10 [1]
## source
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## local
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Github (tidyverse/dplyr@affb977)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Github (Bioconductor/GenomicRanges@70e6e69)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## local
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## Github (r-lib/rlang@a90b04b)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Github (r-lib/vctrs@fd24927)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
##
## [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library
##
## P ── Loaded and on-disk path mismatch.