Get probes from regions
### Call in datasets
meta_sig <- read.csv(dir(dir.result.comp,pattern = "no.*csv",full.names = TRUE))
meta_all <- read.csv(
paste0(dir.result.meta.analysis, "meta_analysis_ALL_df.csv")
) #dim: 40010 41
probes.cluster.all <- coMethDMR::getPredefinedCluster(
arrayType = "450k",
clusterType = "regions"
)
### get all cps in meta_sig input regions
idx <- gsub(
"450k_Gene_3_200.|450k_InterGene_3_200.","",names(probes.cluster.all)
) %in% meta_sig$inputRegion
meta_sig_probes <- probes.cluster.all[idx] %>%
unlist %>%
as.character() %>%
unique
length(meta_sig_probes)
## [1] 742
### get all cps in meta_all input regions
idx <- gsub(
"450k_Gene_3_200.|450k_InterGene_3_200.","",names(probes.cluster.all)
) %in% meta_all$inputRegion
meta_all_probes <- probes.cluster.all[idx] %>%
unlist %>%
as.character() %>%
unique
length(meta_all_probes)
## [1] 203198
all.foreground.probes <- c(meta_sig_probes, foreground.probes) %>% unique
all.background.probes <- c(meta_all_probes, background.probes) %>% unique
Pathway analysis (gene ontology analysis)
library(missMethyl)
library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
### collection = "GO"
all_go <- gometh(
sig.cpg = all.foreground.probes,
all.cpg = all.background.probes,
collection = "GO",
fract.counts = TRUE
)
# topGSA(all_go)
go <- missMethyl:::.getGO()
out <- getMappedEntrezIDs(
sig.cpg = foreground.probes,
all.cpg = background.probes,
array.type = "450K"
)
sorted.eg.sig <- out$sig.eg
gene.info <- TCGAbiolinks::get.GRCh.bioMart(genome = "hg19")
all_go$de_genes <- plyr::aaply(rownames(all_go),1,.fun = function(idx){
gene.info$external_gene_name[gene.info$entrezgene_id %in% intersect(go$idList[[idx]],sorted.eg.sig)] %>%
sort %>% unique %>% paste(collapse = ",")
})
all_go_ordered <- all_go[
order(all_go$P.DE),
]
write.csv(
all_go_ordered,
paste0(dir.result.pathway, "pathway_regions_and_cpgs_GO_results_all.csv"),
row.names = TRUE
)
all_go_ordered.bp <- all_go_ordered %>%
tibble::rownames_to_column("GO") %>% # keep row names
dplyr::filter(ONTOLOGY == "BP") %>%
tibble::column_to_rownames("GO") # keep row names
all_go_ordered.bp$FDR <- p.adjust(all_go_ordered.bp$P.DE,"fdr")
write.csv(
all_go_ordered.bp,
paste0(dir.result.pathway, "pathway_regions_and_cpgs_GO_results_BP_N_range_5_200_fdr_recalc.csv"),
row.names = TRUE
)
all_go_ordered.bp %>%
dplyr::filter(FDR < 0.05) %>%
DT::datatable(filter = 'top',
style = "bootstrap",
extensions = 'Buttons',
options = list(scrollX = TRUE,
dom = 'Bfrtip',
buttons = I('colvis'),
keys = TRUE,
pageLength = 10),
rownames = FALSE,
caption = "GO results BP (FDR < 0.05)")
### collection = "KEGG"
all_kegg <- gometh(
sig.cpg = all.foreground.probes,
all.cpg = all.background.probes,
collection = "KEGG",
fract.counts = TRUE
)
# topGSA(all_kegg)
kegg <- missMethyl:::.getKEGG()
out <- getMappedEntrezIDs(
sig.cpg = foreground.probes,
all.cpg = background.probes,
array.type = "450K"
)
sorted.eg.sig <- out$sig.eg
gene.info <- TCGAbiolinks::get.GRCh.bioMart(genome = "hg19")
all_kegg$de_genes <- plyr::aaply(rownames(all_kegg),1,.fun = function(idx){
gene.info$external_gene_name[gene.info$entrezgene_id %in% intersect(kegg$idList[[idx]],sorted.eg.sig)] %>%
sort %>% unique %>% paste(collapse = ",")
})
all_kegg_ordered <- all_kegg[
order(all_kegg$P.DE),
]
write.csv(
all_kegg_ordered,
paste0(dir.result.pathway, "pathway_regions_and_cpgs_KEGG_results_all.csv"),
row.names = TRUE
)
all_kegg_ordered <- all_kegg_ordered %>%
tibble::rownames_to_column("KEGG") %>% # keep row names
tibble::column_to_rownames("KEGG") # keep row names
all_kegg_ordered$FDR <- p.adjust(all_kegg_ordered$P.DE,"fdr")
write.csv(
all_kegg_ordered,
paste0(dir.result.pathway, "pathway_regions_and_cpgs_KEGG_results_N_range_5_200_fdr_recalc.csv"),
row.names = TRUE
)
all_kegg_ordered %>%
dplyr::filter(P.DE < 0.05) %>%
DT::datatable(filter = 'top',
style = "bootstrap",
extensions = 'Buttons',
options = list(scrollX = TRUE,
dom = 'Bfrtip',
buttons = I('colvis'),
keys = TRUE,
pageLength = 10),
rownames = FALSE,
caption = "kegg results (P.DE < 0.05)")