This report contains a correlation analysis between DNA methylation levels of probes and their annotated gene nearest genes. The main purpose is to compare the differences in the DNA methylation metadata in GDC legacy database vs GDC harmonized data.
A pair of probe and genes are consider:
As the purpose is to compare DNA methylation annotation, the correlation of both analyses (hg19 and hg38) used the same gene expression data (GDC harmonized - FPKM-UQ).
This sections compared hg38 associations and h19 associations. The hg38 annotation was retrieved (same used in GDC) from http://zwdzwd.github.io/InfiniumAnnotation and only genes & probes with distance between 0 and -1500 (negative distance) are kept. The hg19 annotation was retrieved the ilumina manifest. We kept only TSS200 and TSS1500 associations.
# The code to create the object (associated.genes_id and associated.genes_id.hg19) is below
associated.genes_id <- associated.genes_id[as.numeric(as.character(associated.genes_id$distance)) < 0,]
associated.genes_id
associated.genes_id$distance <- NULL # ignore different distances
associated.genes_id <- unique(associated.genes_id)
associated.genes_id
# hg19: Illumina manifest. Max distance TSS 1500
unique(associated.genes_id.hg19[,c("probe", "ensembl_gene_id","group_type")])
hg19.pairs <- unique(associated.genes_id.hg19[,c("probe", "ensembl_gene_id")])
colnames(hg19.pairs)[1] <- "probe"
hg38.pairs <- unique(associated.genes_id[,c("probe","ensembl_gene_id")])
shared.pairs <- inner_join(as_tibble(hg19.pairs),as_tibble(hg38.pairs))
unique.hg19.pairs <- anti_join(as_tibble(hg19.pairs),as_tibble(hg38.pairs))
unique.hg38.pairs <- anti_join(as_tibble(hg38.pairs),as_tibble(hg19.pairs))
unique.hg19.genes <- setdiff(unique(hg19.pairs$ensembl_gene_id),unique(hg38.pairs$ensembl_gene_id))
unique.hg38.genes <- setdiff(unique(hg38.pairs$ensembl_gene_id),unique(hg19.pairs$ensembl_gene_id))
shared.genes <- intersect(unique(hg38.pairs$ensembl_gene_id),unique(hg19.pairs$ensembl_gene_id))
library(VennDiagram)
draw.pairwise.venn(length(unique(associated.genes_id$probe)),
length(unique(associated.genes_id.hg19$probe)),
length(intersect(unique(associated.genes_id$probe),
unique(associated.genes_id.hg19$probe))),
category = c("Probes hg38", "Probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.11], polygon[GRID.polygon.12], polygon[GRID.polygon.13], polygon[GRID.polygon.14], text[GRID.text.15], text[GRID.text.16], lines[GRID.lines.17], text[GRID.text.18], text[GRID.text.19], text[GRID.text.20])
draw.pairwise.venn(length(unique(associated.genes_id$ensembl_gene_id)),
length(unique(associated.genes_id.hg19$ensembl_gene_id)),
length(intersect(unique(associated.genes_id$ensembl_gene_id),
unique(associated.genes_id.hg19$ensembl_gene_id))),
category = c("Genes associated to probes hg38", "Genes associated to probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.21], polygon[GRID.polygon.22], polygon[GRID.polygon.23], polygon[GRID.polygon.24], text[GRID.text.25], text[GRID.text.26], lines[GRID.lines.27], text[GRID.text.28], text[GRID.text.29], text[GRID.text.30])
g1 <- unique(associated.genes_id[,c("probe","ensembl_gene_id")])
g2 <- unique(associated.genes_id.hg19[,c("probe","ensembl_gene_id")])
draw.pairwise.venn(nrow(g1),
nrow(g2),
sum(paste0(g1$probe,"_",g1$ensembl_gene_id) %in% paste0(g2$probe,"_",g2$ensembl_gene_id)),
category = c("Pairs associated to probes hg38", "Pairs associated to probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.31], polygon[GRID.polygon.32], polygon[GRID.polygon.33], polygon[GRID.polygon.34], text[GRID.text.35], text[GRID.text.36], lines[GRID.lines.37], text[GRID.text.38], text[GRID.text.39], text[GRID.text.40])
library(plotly)
# Each correlation file has pairs of gene probes (max distance 1500bp)
# correlation values (spearman pvalue, rho and status)
# we read it and summarize
correlation.status <- NULL
files <- dir(pattern = "correlation_final.rda", recursive = T, full.names = T)
for(f in files){
load(f)
# to keep only the same distance signal
hg38_correlation <- left_join(as_tibble(unique(associated.genes_id[,c("probe", "ensembl_gene_id")])),
as_tibble(hg38_correlation))
tab <- plyr::count(hg19_correlation$status)
hg19.col <- gsub("\\.\\/","",gsub("_correlation_final.rda","_hg19",f))
colnames(tab) <- c("Status",hg19.col)
if(is.null(correlation.status)) {
correlation.status <- tab
} else {
correlation.status <- full_join(correlation.status, tab)
}
tab <- plyr::count(hg38_correlation$status)
hg38.col <- gsub("\\.\\/","",gsub("_correlation_final.rda","_hg38",f))
colnames(tab) <- c("Status",hg38.col)
correlation.status <- full_join(correlation.status, tab)
}
correlation.status[is.na(correlation.status)] <- 0
# Each correlation file has pairs of gene probes (max distance 1500bp)
# correlation values (spearman pvalue, rho and status)
# we read it and select only Strongly negatively correlated (SNC)
SNC.type <- NULL
phantom <- NULL
files <- dir(pattern = "correlation_final.rda", recursive = T, full.names = T)
for(f in files){
load(f)
# to keep only the same distance signal
hg38_correlation <- left_join(as_tibble(unique(associated.genes_id[,c("probe", "ensembl_gene_id")])),
as_tibble(hg38_correlation))
hg19 <- hg19_correlation[hg19_correlation$status == "Strongly negatively correlated (SNC)",]
paired.genes <- unique(hg19$ensembl_gene_id)
tab <- plyr::count(hg38_correlation[match(paired.genes, hg38_correlation$ensembl_gene_id),]$group_type)
phantom.hg19 <- c(sum(table(hg19[!duplicated(hg19$probe),]$Phantom)),sum(is.na(hg19[!duplicated(hg19$probe),]$Phantom)))
hg19.col <- gsub("\\.\\/","", gsub("_correlation_final.rda","_hg19",f))
colnames(tab) <- c("Gene_group_type", hg19.col)
if(is.null(SNC.type)) {
SNC.type <- tab
} else {
SNC.type <- full_join(SNC.type, tab)
}
aux <- hg38_correlation[hg38_correlation$status == "Strongly negatively correlated (SNC)",]
z <- hg19_correlation[hg19_correlation$probe %in% aux$probe,]
z <- z[!duplicated(z$probe),]
phantom.hg38 <- c(sum(table(z$Phantom)),
sum(is.na(z$Phantom)))
phantom <- rbind(phantom,phantom.hg19,phantom.hg38)
tab <- plyr::count(aux[!duplicated(aux$ensembl_gene_id),]$group_type)
hg38.col <- gsub("\\.\\/", "", gsub("_correlation_final.rda","_hg38",f))
colnames(tab) <- c("Gene_group_type", hg38.col)
SNC.type <- full_join(SNC.type, tab)
}
colnames(phantom) <- c("Phantom","No phantom")
rownames(phantom) <- sapply(files, function(x) return(c(paste0("hg38",x),paste0("hg19",x))))
# Each correlation file has pairs of gene probes (max distance 1500bp)
# correlation values (spearman pvalue, rho and status)
# we read it and select only Weakly negatively correlated (WNC)
WNC.type <- NULL
files <- dir(pattern = "correlation_final.rda", recursive = T, full.names = T)
for(f in files){
load(f)
# to keep only the same distance signal
hg38_correlation <- left_join(as_tibble(unique(associated.genes_id[,c("probe", "ensembl_gene_id")])),
as_tibble(hg38_correlation))
paired.genes <- unique(hg19_correlation[hg19_correlation$status == "Weakly negatively correlated (WNC)",]$ensembl_gene_id)
tab <- plyr::count(hg38_correlation[match(paired.genes, hg38_correlation$ensembl_gene_id),]$group_type)
hg19.col <- gsub("\\.\\/","", gsub("_correlation_final.rda","_hg19",f))
colnames(tab) <- c("Gene_group_type", hg19.col)
if(is.null(WNC.type)) {
WNC.type <- tab
} else {
WNC.type <- full_join(WNC.type, tab)
}
aux <- hg38_correlation[hg38_correlation$status == "Weakly negatively correlated (WNC)",]
tab <- plyr::count(aux[!duplicated(aux$ensembl_gene_id),]$group_type)
hg38.col <- gsub("\\.\\/", "", gsub("_correlation_final.rda","_hg38",f))
colnames(tab) <- c("Gene_group_type", hg38.col)
WNC.type <- full_join(WNC.type, tab)
}
colnames(correlation.status)[-1] <- paste0(colnames(correlation.status)[-1],"_negative")
colnames(SNC.type)[-1] <- paste0(colnames(SNC.type)[-1],"_negative")
colnames(WNC.type)[-1] <- paste0(colnames(WNC.type)[-1],"_negative")
WNC.type.negative <- WNC.type
SNC.type.negative <- SNC.type
correlation.status.negative <- correlation.status
save(correlation.status.negative,SNC.type.negative,WNC.type.negative,file = "negative_tables.rda")
This sections compared hg38 associations and h19 associations. The hg38 annotation was retrieved (same used in GDC) from http://zwdzwd.github.io/InfiniumAnnotation and only genes & probes with distance between 0 and +1500 (positive distance) are kept. The hg19 annotation was retrieved the ilumina manifest. We kept only TSS200 and TSS1500 associations.
# Source: http://zwdzwd.github.io/InfiniumAnnotation
# Only keeping genes & probes with distance between 0 and 1500
associated.genes_id
associated.genes_id$distance <- NULL # ignore different distances
associated.genes_id <- unique(associated.genes_id)
associated.genes_id
# hg19: Illumina manifest. Max distance TSS 1500
unique(associated.genes_id.hg19[,c("probe", "ensembl_gene_id","group_type")])
hg19.pairs <- unique(associated.genes_id.hg19[,c("probe", "ensembl_gene_id")])
colnames(hg19.pairs)[1] <- "probe"
hg38.pairs <- unique(associated.genes_id[,c("probe","ensembl_gene_id")])
shared.pairs <- inner_join(as_tibble(hg19.pairs),as_tibble(hg38.pairs))
unique.hg19.pairs <- anti_join(as_tibble(hg19.pairs),as_tibble(hg38.pairs))
unique.hg38.pairs <- anti_join(as_tibble(hg38.pairs),as_tibble(hg19.pairs))
unique.hg19.genes <- setdiff(unique(hg19.pairs$ensembl_gene_id),unique(hg38.pairs$ensembl_gene_id))
unique.hg38.genes <- setdiff(unique(hg38.pairs$ensembl_gene_id),unique(hg19.pairs$ensembl_gene_id))
shared.genes <- intersect(unique(hg38.pairs$ensembl_gene_id),unique(hg19.pairs$ensembl_gene_id))
library(VennDiagram)
draw.pairwise.venn(length(unique(associated.genes_id$probe)),
length(unique(associated.genes_id.hg19$probe)),
length(intersect(unique(associated.genes_id$probe),
unique(associated.genes_id.hg19$probe))),
category = c("Probes hg38", "Probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.41], polygon[GRID.polygon.42], polygon[GRID.polygon.43], polygon[GRID.polygon.44], text[GRID.text.45], text[GRID.text.46], text[GRID.text.47], text[GRID.text.48], text[GRID.text.49])
draw.pairwise.venn(length(unique(associated.genes_id$ensembl_gene_id)),
length(unique(associated.genes_id.hg19$ensembl_gene_id)),
length(intersect(unique(associated.genes_id$ensembl_gene_id),
unique(associated.genes_id.hg19$ensembl_gene_id))),
category = c("Genes associated to probes hg38", "Genes associated to probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.50], polygon[GRID.polygon.51], polygon[GRID.polygon.52], polygon[GRID.polygon.53], text[GRID.text.54], text[GRID.text.55], lines[GRID.lines.56], text[GRID.text.57], text[GRID.text.58], text[GRID.text.59])
g1 <- unique(associated.genes_id[,c("probe","ensembl_gene_id")])
g2 <- unique(associated.genes_id.hg19[,c("probe","ensembl_gene_id")])
draw.pairwise.venn(nrow(g1),
nrow(g2),
sum(paste0(g1$probe,"_",g1$ensembl_gene_id) %in% paste0(g2$probe,"_",g2$ensembl_gene_id)),
category = c("Pairs associated to probes hg38", "Pairs associated to probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.60], polygon[GRID.polygon.61], polygon[GRID.polygon.62], polygon[GRID.polygon.63], text[GRID.text.64], text[GRID.text.65], text[GRID.text.66], text[GRID.text.67], text[GRID.text.68])
This sections compared hg38 associations and h19 associations. The hg38 annotation was retrieved (same used in GDC) from http://zwdzwd.github.io/InfiniumAnnotation and only genes & probes with distance between -1500 and +1500 are kept. The hg19 annotation was retrieved the ilumina manifest. We kept only TSS200 and TSS1500 associations.
# Source: http://zwdzwd.github.io/InfiniumAnnotation
# Only keeping genes & probes with distance < 1500
associated.genes_id
associated.genes_id$distance <- NULL # ignore different distances
associated.genes_id <- unique(associated.genes_id)
associated.genes_id
# hg19: Illumina manifest. Max distance TSS 1500
unique(associated.genes_id.hg19[,c("probe", "ensembl_gene_id","group_type")])
hg19.pairs <- unique(associated.genes_id.hg19[,c("probe", "ensembl_gene_id")])
colnames(hg19.pairs)[1] <- "probe"
hg38.pairs <- unique(associated.genes_id[,c("probe","ensembl_gene_id")])
shared.pairs <- inner_join(as_tibble(hg19.pairs),as_tibble(hg38.pairs))
unique.hg19.pairs <- anti_join(as_tibble(hg19.pairs),as_tibble(hg38.pairs))
unique.hg38.pairs <- anti_join(as_tibble(hg38.pairs),as_tibble(hg19.pairs))
unique.hg19.genes <- setdiff(unique(hg19.pairs$ensembl_gene_id),unique(hg38.pairs$ensembl_gene_id))
unique.hg38.genes <- setdiff(unique(hg38.pairs$ensembl_gene_id),unique(hg19.pairs$ensembl_gene_id))
shared.genes <- intersect(unique(hg38.pairs$ensembl_gene_id),unique(hg19.pairs$ensembl_gene_id))
library(VennDiagram)
draw.pairwise.venn(length(unique(associated.genes_id$probe)),
length(unique(associated.genes_id.hg19$probe)),
length(intersect(unique(associated.genes_id$probe),
unique(associated.genes_id.hg19$probe))),
category = c("Probes hg38", "Probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.69], polygon[GRID.polygon.70], polygon[GRID.polygon.71], polygon[GRID.polygon.72], text[GRID.text.73], text[GRID.text.74], lines[GRID.lines.75], text[GRID.text.76], text[GRID.text.77], text[GRID.text.78])
draw.pairwise.venn(length(unique(associated.genes_id$ensembl_gene_id)),
length(unique(associated.genes_id.hg19$ensembl_gene_id)),
length(intersect(unique(associated.genes_id$ensembl_gene_id),
unique(associated.genes_id.hg19$ensembl_gene_id))),
category = c("Genes associated to probes hg38", "Genes associated to probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.79], polygon[GRID.polygon.80], polygon[GRID.polygon.81], polygon[GRID.polygon.82], text[GRID.text.83], text[GRID.text.84], lines[GRID.lines.85], text[GRID.text.86], text[GRID.text.87], text[GRID.text.88])
g1 <- unique(associated.genes_id[,c("probe","ensembl_gene_id")])
g2 <- unique(associated.genes_id.hg19[,c("probe","ensembl_gene_id")])
draw.pairwise.venn(nrow(g1),
nrow(g2),
sum(paste0(g1$probe,"_",g1$ensembl_gene_id) %in% paste0(g2$probe,"_",g2$ensembl_gene_id)),
category = c("Pairs associated to probes hg38", "Pairs associated to probes hg19"),
lty = rep("blank", 2),
fill = c("light blue", "pink"),
alpha = rep(0.5, 2),
cat.pos = c(0, 0),
cat.dist = rep(0.025, 2))
## (polygon[GRID.polygon.89], polygon[GRID.polygon.90], polygon[GRID.polygon.91], polygon[GRID.polygon.92], text[GRID.text.93], text[GRID.text.94], lines[GRID.lines.95], text[GRID.text.96], text[GRID.text.97], text[GRID.text.98])
load(file.path(root,"associated.genes_id_hg19.rda"))
load(file.path(root,"associated.genes_id.rda"))
colnames(associated.genes_id.hg19)[2] <- "probe"
associated.genes_id.negative <- associated.genes_id[as.numeric(as.character(associated.genes_id$distance)) < 0,]
associated.genes_id.negative <- unique(associated.genes_id.negative[,c("group_type","ensembl_gene_id")])
associated.genes_id <- unique(associated.genes_id[,c("group_type","ensembl_gene_id")])
gene.hg19 <- unique(associated.genes_id.hg19$ensembl_gene_id)
associated.genes_id.hg19 <- data.frame(ensembl_gene_id = gene.hg19,
group_type = associated.genes_id$group_type[match(gene.hg19,
associated.genes_id$ensembl_gene_id)]
)
tab.h19 <- plyr::count(associated.genes_id.hg19$group_type)
colnames(tab.h19) <- c("Gene_group_type", "hg19_negative")
tab.h38 <- plyr::count(associated.genes_id.negative$group_type)
colnames(tab.h38) <- c("Gene_group_type", "hg38_negative")
tab.h38.all <- plyr::count(associated.genes_id$group_type)
colnames(tab.h38.all) <- c("Gene_group_type", "hg38_all")
all.type <- NULL
all.type <- full_join(tab.h38, tab.h38.all)
all.type <- full_join(all.type, tab.h19)
library(plotly)
# Each correlation file has pairs of gene probes (max distance 1500bp)
# correlation values (spearman pvalue, rho and status)
# we read it and summarize
files <- dir(pattern = "correlation_final.rda",recursive = T,full.names = T)
correlation.status <- NULL
for(f in files){
load(f)
tab <- plyr::count(hg19_correlation$status)
hg19.col <- gsub("\\.\\/","",gsub("_correlation_final.rda","_hg19",f))
colnames(tab) <- c("Status",hg19.col)
if(is.null(correlation.status)) {
correlation.status <- tab
} else {
correlation.status <- full_join(correlation.status, tab)
}
tab <- plyr::count(hg38_correlation$status)
hg38.col <- gsub("\\.\\/","",gsub("_correlation_final.rda","_hg38",f))
colnames(tab) <- c("Status",hg38.col)
correlation.status <- full_join(correlation.status, tab)
}
# Each correlation file has pairs of gene probes (max distance 1500bp)
# correlation values (spearman pvalue, rho and status)
# we read it and select only Strongly negatively correlated (SNC)
SNC.type <- NULL
phantom <- NULL
files <- dir(pattern = "correlation_final.rda", recursive = T, full.names = T)
for(f in files){
message(f)
load(f)
if(!any(hg19_correlation$status == "Strongly negatively correlated (SNC)")) next
hg19 <- hg19_correlation[hg19_correlation$status == "Strongly negatively correlated (SNC)",]
paired.genes <- unique(hg19$ensembl_gene_id)
tab <- plyr::count(hg38_correlation[match(paired.genes, hg38_correlation$ensembl_gene_id),]$group_type)
phantom.hg19 <- c(sum(table(hg19[!duplicated(hg19$probe),]$Phantom)),sum(is.na(hg19[!duplicated(hg19$probe),]$Phantom)))
hg19.col <- gsub("\\.\\/","", gsub("_correlation_final.rda","_hg19",f))
colnames(tab) <- c("Gene_group_type", hg19.col)
if(is.null(SNC.type)) {
SNC.type <- tab
} else {
SNC.type <- full_join(SNC.type, tab)
}
aux <- hg38_correlation[hg38_correlation$status == "Strongly negatively correlated (SNC)",]
tab <- plyr::count(aux[!duplicated(aux$ensembl_gene_id),]$group_type)
hg38.col <- gsub("\\.\\/", "", gsub("_correlation_final.rda","_hg38",f))
colnames(tab) <- c("Gene_group_type", hg38.col)
SNC.type <- full_join(SNC.type, tab)
}
# Each correlation file has pairs of gene probes (max distance 1500bp)
# correlation values (spearman pvalue, rho and status)
# we read it and select only Weakly negatively correlated (WNC)
WNC.type <- NULL
files <- dir(pattern = "correlation_final.rda", recursive = T, full.names = T)
for(f in files){
load(f)
if(!any(hg19_correlation$status == "Weakly negatively correlated (WNC)")) next
paired.genes <- unique(hg19_correlation[hg19_correlation$status == "Weakly negatively correlated (WNC)",]$ensembl_gene_id)
tab <- plyr::count(hg38_correlation[match(paired.genes, hg38_correlation$ensembl_gene_id),]$group_type)
hg19.col <- gsub("\\.\\/","", gsub("_correlation_final.rda","_hg19",f))
colnames(tab) <- c("Gene_group_type", hg19.col)
if(is.null(WNC.type)) {
WNC.type <- tab
} else {
WNC.type <- full_join(WNC.type, tab)
}
aux <- hg38_correlation[hg38_correlation$status == "Weakly negatively correlated (WNC)",]
tab <- plyr::count(aux[!duplicated(aux$ensembl_gene_id),]$group_type)
hg38.col <- gsub("\\.\\/", "", gsub("_correlation_final.rda","_hg38",f))
colnames(tab) <- c("Gene_group_type", hg38.col)
WNC.type <- full_join(WNC.type, tab)
}
colnames(correlation.status)[-1] <- paste0(colnames(correlation.status)[-1] ,"_all")
colnames(SNC.type)[-1] <- paste0(colnames(SNC.type)[-1] ,"_all")
colnames(WNC.type)[-1] <- paste0(colnames(WNC.type)[-1] ,"_all")
WNC.type.all <- WNC.type
SNC.type.all <- SNC.type
correlation.status.all <- correlation.status
WNC.type <- full_join(WNC.type.negative, WNC.type.all)
SNC.type <- full_join(SNC.type.negative, SNC.type.all)
correlation.status <- full_join(correlation.status.negative,correlation.status.all)
save(WNC.type,SNC.type,correlation.status,file = "plot_new.rda")
This section will evaluate the transcript type for all associations and for each tumor type their status and fthe most significant anti-correlated associations (SNC and WNC) the transcript type for all associated genes.
Using the function getTCGA
from ELMER that uses TCGAbiolinks to download GDC data, we will download all RNA-seq and DNA methylation into a Data folder. This data will be used later to create a MultiAssayExperiment
object that will only keeps samples with both DNA methylation and gene expression.
library(ELMER)
library(TCGAbiolinks)
root <- "/mnt/home/tiagochst/paper_elmer/GDAN/"
setwd(root)
# get TCGA projects
projects <- TCGAbiolinks:::getGDCprojects()$project_id
projects <- projects[grepl('^TCGA', projects, perl = TRUE)]
# Download RNA-seq and DNA methylation for hg19 and hg38
for(proj in gsub("TCGA-", "", projects)){
for(genome in c("hg38","hg19")){
getTCGA(disease = proj, # TCGA disease abbreviation (BRCA, BLCA, GBM, LGG, etc)
basedir = "Data", # Where data will be downloaded
genome = genome) # Genome of reference "hg38" or "hg19"
}
}
library(doParallel)
library(parallel)
library(dplyr)
library(ELMER)
library(SummarizedExperiment)
library(TCGAbiolinks)
library(readr)
root <- "/mnt/home/tiagochst/paper_elmer/GDAN/"
setwd(root)
# Parallelizing code
cores <- 2
registerDoParallel(cores)
file <- "associated.genes_id.rda"
associated.genes_id <- NULL
if(file.exists(file)) {
associated.genes_id <- unique(get(load(file)))
associated.genes_id$distance <- NULL
associated.genes_id <- unique(associated.genes_id)
}
tumors <- TCGAbiolinks:::getGDCprojects()$project_id
tumors <- tumors[grepl('^TCGA', tumors, perl = TRUE)]
for(tumor in tumors){
print(tumor)
if(file.exists(paste0(paste0(tumor,"/hg38/"),tumor,"_hg38_correlation.csv"))) next
dir.create(paste0(tumor,"/hg38/"),showWarnings = FALSE,recursive = T)
met <- get(load(paste0(root,"/Data/",tumor,"/",tumor,"_meth_hg38_no_filter.rda")))
rna <- get(load(paste0(root,"/Data/",tumor,"/",tumor,"_RNA_hg38.rda")))
gene.metadata <- values(rna)[,1:2]
met <- met[rowRanges(met)$Gene_Symbol != ".",]
if(is.null(associated.genes_id)){
associated.genes_id <- plyr::adply(.data = values(met),
.margins = 1,
.fun = function(x){
genes <- x["Gene_Symbol"] %>% stringr::str_split(";") %>% unlist
groups <- x["Gene_Type"] %>% stringr::str_split(";") %>% unlist
distance <- x["Position_to_TSS"] %>% stringr::str_split(";") %>% unlist
idx <- which(abs(as.numeric(distance)) <= 1500)
data.frame(gene = genes[idx],
group_type = groups[idx],
distance = distance[idx],
probe = rep(x$Composite.Element.REF,
length(idx)))
},.id = NULL,.expand = T,.progress = T,.parallel = T)
save(associated.genes_id, file = "associated.genes_id_not_merged_max_dist_1500.rda")
tss <- ELMER:::getTSS(genome = "hg38")
idx <- !associated.genes_id$gene %in% tss$external_gene_name
associated.genes_id$gene[idx] <- as.character(HGNChelper::checkGeneSymbols(associated.genes_id$gene[idx])$Suggested.Symbol)
associated.genes_id <- unique(
merge(associated.genes_id,
unique(tss[,c("external_gene_name","ensembl_gene_id")]),
by.x = "gene",
by.y = "external_gene_name")
)
save(associated.genes_id,file = file)
}
# This will keep samples with the both DNA methylation and gene expression
# Take the log2(exp + 1)
# Put samples in the right order
# Keep only probes in 2KB close to TSS
# Remove probes that have NA for more than 50% of samples
mae.file <- paste0(paste0(tumor),"/",tumor,"_mae_hg38.rda")
if(file.exists(mae.file)) {
mae <- get(load(mae.file))
} else {
mae <- createMAE(exp = rna,
met = met[rownames(met) %in% associated.genes_id$probe,],
TCGA = TRUE,
linearize.exp = T,
save = T,
save.filename = paste0(tumor,"/",tumor,"_mae_hg38.rda"),
met.platform = "450K",
genome = "hg38",
met.na.cut = 0.5)
}
correlations <- plyr::adply(unique(associated.genes_id[,3:4]),
.margins = 1,
.fun = function(x){
GeneID <- as.character(x$ensembl_gene_id)
probe <- as.character(x$probe)
if(!probe %in% names(getMet(mae))) {
return(tibble::tibble(rho = NA,
pval = NA,
probe,
ensembl_gene_id = GeneID))
}
cor <- cor.test(x = as.numeric(assay(getMet(mae)[probe])),
y = as.numeric(assay(getExp(mae)[GeneID])),
method = c("spearman"))
corval <- as.numeric(cor$estimate)
pvalue <- as.numeric(cor$p.value)
df <- tibble::tibble(rho = corval,
pval = pvalue,
probe,
ensembl_gene_id = GeneID)
return(df)
},.id = NULL,.expand = T,.progress = "text",.parallel = F)
#correlations <- correlations[!is.na(correlations$rho),] # Remove associations not found
correlations$rho <- as.numeric(as.character(correlations$rho))
correlations$pval <- as.numeric(as.character(correlations$pval))
correlations$FDR <- p.adjust(as.numeric(as.character(correlations$pval)),method = "BH")
# If FDR > 0.05 - Insignificant
# If FDR <= 0.05:
# - Strongly negatively correlated (SNC) when the rho value was less than -0.5;
# - Weakly negatively correlated (WNC) when the rho value was between -0.5 and -0.25;
# - No negative correlation (NNC) when the rho value was greater than -0.25.
correlations$status <- "Insignificant"
correlations$status[correlations$FDR <= 0.05 & as.numeric(correlations$rho) >= -0.25] <- "No negative correlation"
correlations$status[correlations$FDR <= 0.05 & correlations$rho < -0.25 & correlations$rho > -0.5] <- "Weakly negatively correlated (WNC)"
correlations$status[correlations$FDR <= 0.05 & correlations$rho <= -0.5] <- "Strongly negatively correlated (SNC)"
correlations <- correlations[with(correlations,order(rho,pval)),]
# save
readr::write_csv(as.data.frame(correlations),
path = paste0(paste0(tumor,"/hg38/"),"/",tumor,"_hg38_correlation.csv"))
aux <- left_join(as_tibble(correlations),
unique(as_tibble(associated.genes_id)))
readr::write_csv(aux,
path = paste0(paste0(tumor,"/hg38/"),"/",tumor,"_hg38_correlation_all_info.csv"))
}
library(doParallel)
library(parallel)
library(dplyr)
library(ELMER)
library(SummarizedExperiment)
library(TCGAbiolinks)
root <- "/mnt/home/tiagochst/paper_elmer/GDAN/"
setwd(root)
# Illumina manifest
# Source: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GPL13534
metadata.hg19 <- readr::read_tsv(file = paste0(root,"/450k-manifest-GPL13534-11288.txt"),skip = 37)
metadata.hg19 <- metadata.hg19[!is.na(metadata.hg19$UCSC_RefGene_Name),]
metadata.hg19 <- metadata.hg19[grep("TSS1500|TSS200",metadata.hg19$UCSC_RefGene_Group),]
file <- "associated.genes_id_hg19.rda"
associated.genes_id.hg19 <- NULL
if(file.exists(file)) {
associated.genes_id.hg19 <- get(load(file))
colnames(associated.genes_id.hg19)[3] <- "probe"
}
if(is.null(associated.genes_id.hg19)){
associated.genes_id.hg19 <- plyr::adply(.data = metadata.hg19,
.margins = 1,
.fun = function(x){
genes <- x["UCSC_RefGene_Name"] %>% stringr::str_split(";") %>% unlist
groups <- x["UCSC_RefGene_Group"] %>% stringr::str_split(";") %>% unlist
idx <- grep("TSS",groups)
data.frame(gene = genes[idx],group_type = groups[idx])
},.id = NULL,.expand = T,.progress = T,.parallel = TRUE)
# get all LOC gene names and update them there are not in ENSEMBL database with LOC
symbols <- gsub("LOC","",unique(grep("^LOC",associated.genes_id.hg19$gene,value = T)))
mapping <- select(org.Hs.eg.db, symbols, c("ENTREZID","GENENAME","ENSEMBL","SYMBOL"))
# Replace LOC by gene names. LOC should be removed as this gene names are LOC + entrez ID
aux <- associated.genes_id.hg19
idx <- grep("^LOC",aux$gene)
aux$gene[idx] <- as.character(mapping$SYMBOL[match(gsub("LOC","",aux$gene[idx]),mapping$ENTREZID)])
idx <- !aux$gene %in% tss$external_gene_name
aux$gene[idx] <- HGNChelper::checkGeneSymbols(aux$gene[idx])$Suggested.Symbol
associated.genes_id.hg19 <- aux
associated.genes_id.hg19 <- merge(associated.genes_id.hg19,
values(rna)[,1:2],
by.x = "gene",
by.y = "external_gene_name")
save(associated.genes_id.hg19,file = file)
}
library(doParallel)
library(parallel)
library(dplyr)
library(ELMER)
cores <- 2
registerDoParallel(cores)
tumors <- TCGAbiolinks:::getGDCprojects()$project_id
tumors <- tumors[grepl('^TCGA', tumors, perl = TRUE)]
for(tumor in tumors){
print(tumor)
if(file.exists(paste0(paste0(tumor,"/hg19/"),"/",tumor,"_hg19_correlation.csv"))) next
dir.create(paste0(tumor,"/hg19/"),showWarnings = FALSE,recursive = TRUE)
met <- get(load(paste0(root,"/Data/",tumor,"/",tumor,"_meth_hg38_no_filter.rda")))
met <- met[unique(associated.genes_id.hg19$ID),]
rna <- get(load(paste0(root,"/Data/",tumor,"/",tumor,"_RNA_hg38.rda")))
# This will keep samples with the both DNA methylation and gene expression
# Take the log2(exp + 1)
# Put samples in the right order
# Keep only probes in 2KB close to TSS
# Remove probes that have NA for more than 50% of samples
dir.create(paste0(paste0(tumor,"/hg19/")),showWarnings = F,recursive = T)
mae.file <- paste0(paste0(tumor,"/hg19/"),"/",tumor,"_mae_hg19.rda")
if(file.exists(mae.file)) {
mae <- get(load(mae.file))
} else {
mae <- createMAE(exp = rna,
met = met,
TCGA = TRUE,
linearize.exp = T,
save = T,
save.filename = mae.file,
met.platform = "450K",
genome = "hg19",
met.na.cut = 0.5)
save(mae, file = mae.file)
}
correlations <- plyr::adply(associated.genes_id.hg19,
.margins = 1,
.fun = function(x){
GeneID <- as.character(x$ensembl_gene_id)
probe <- as.character(x$probe)
if(!probe %in% names(getMet(mae))) {
return(tibble::tibble(rho = NA, pval = NA, ensembl_gene_id = GeneID, probe))
}
cor <- cor.test(x = as.numeric(assay(getMet(mae)[probe])),
y = as.numeric(assay(getExp(mae)[GeneID])),
method = c("spearman"))
corval <- as.numeric(cor$estimate)
pvalue <- as.numeric(cor$p.value)
tibble::tibble(rho = corval,pval = pvalue, ensembl_gene_id = GeneID, probe)
},.id = NULL,
.expand = T,
.progress = "text",
.parallel = T)
#correlations <- correlations[!is.na(correlations$rho),] # Remove associations not found
correlations$rho <- as.numeric(as.character(correlations$rho))
correlations$pval <- as.numeric(as.character(correlations$pval))
correlations$FDR <- p.adjust(as.numeric(as.character(correlations$pval)),method = "BH")
# If FDR > 0.05 - Insignificant
# If FDR <= 0.05:
# - Strongly negatively correlated (SNC) when the rho value was less than -0.5;
# - Weakly negatively correlated (WNC) when the rho value was between -0.5 and -0.25;
# - No negative correlation (NNC) when the rho value was greater than -0.25.
correlations <- correlations[!is.na( as.numeric(correlations$rho)),]
correlations$status <- "Insignificant"
correlations[correlations$FDR <= 0.05 & as.numeric(correlations$rho) >= -0.25,]$status <- "No negative correlation"
correlations[correlations$FDR <= 0.05 & correlations$rho < -0.25 & correlations$rho > -0.5,]$status <- "Weakly negatively correlated (WNC)"
correlations[correlations$FDR <= 0.05 & correlations$rho <= -0.5,]$status <- "Strongly negatively correlated (SNC)"
correlations <- correlations[with(correlations,order(rho,pval)),]
# save
readr::write_csv(unique(as.data.frame(correlations)),
path = paste0(paste0(tumor,"/hg19/"),"/",tumor,"_hg19_correlation.csv"))
}
library(ELMER)
#
# Plot DNA methylation vs Gene expression
#
ntop <- 100 # plot the top correlations
tumors <- TCGAbiolinks:::getGDCprojects()$project_id
tumors <- tumors[grepl('^TCGA', tumors, perl = TRUE)]
# hg38
files <- dir(pattern = "correlation.csv", recursive = T, full.names = T)
for(tumor in tumors){
mae.file <- grep(tumor,dir(pattern = "mae_hg38", recursive = T, full.names = T), value = TRUE)
if(length(mae.file) == 0) next
load(mae.file)
f <- sort(grep(tumor, files,value = T))
hg38_correlation <- read_csv(f[2])
colnames(hg38_correlation)[grep("ENSG", hg38_correlation[1,])] <- "ensembl_gene_id"
for(i in 1:ntop){
library(ELMER)
dir.create(paste0(tumor,"/hg38/plots/"), recursive = T)
scatter.plot(data = mae,
dir.out = paste0(tumor,"/hg38/plots/"),
save = TRUE,
byPair = list(probe = as.character(hg38_correlation$probe[i]),
gene = as.character(hg38_correlation$ensembl_gene_id[i])),
category = "definition")
}
}
# hg19
files <- dir(pattern = "correlation.csv", recursive = TRUE, full.names = TRUE)
for(tumor in tumors){
f <- sort(grep(tumor, files,value = T))
hg19_correlation <- read_csv(f[1])
colnames(hg19_correlation)[grep("cg",hg19_correlation[1,])] <- "probe"
mae.file <- grep(tumor,dir(pattern = "mae_hg19", recursive = TRUE, full.names = TRUE), value = TRUE)
if(length(mae.file) == 0) next
load(mae.file)
for(i in 1:ntop){
dir.create(paste0(tumor, "/hg19/plots/"), recursive = T)
scatter.plot(data = mae,
dir.out = paste0(tumor, "/hg19/plots/"),
save = TRUE,
byPair = list(probe = as.character(hg19_correlation$probe[i]),
gene = as.character(hg19_correlation$ensembl_gene_id[i])),
category = "definition")
}
}
library(readr)
library(dplyr)
root <- "/mnt/home/tiagochst/paper_elmer/GDAN/"
setwd(root)
load(file.path(root,"associated.genes_id_hg19.rda"))
load(file.path(root,"associated.genes_id.rda"))
colnames(associated.genes_id.hg19)[2] <- "probe"
associated.genes_id$distance <- NULL
associated.genes_id <- unique(associated.genes_id)
files <- dir(pattern = "correlation.csv",recursive = T,full.names = T)
tumors <- basename(dirname(dirname(files)))
snc.in.hg38.not.in.hg19 <- NULL
for(tumor in tumors){
print(tumor)
f <- sort(grep(tumor, files,value = T))
hg19_correlation <- read_csv(f[1])
colnames(hg19_correlation)[grep("cg",hg19_correlation[1,])] <- "probe"
hg38_correlation <- read_csv(f[2])
colnames(hg38_correlation)[grep("ENSG",hg38_correlation[1,])] <- "ensembl_gene_id"
hg19_correlation <- left_join(as_tibble(hg19_correlation),unique(as_tibble(associated.genes_id.hg19)))
hg38_correlation <- left_join(as_tibble(hg38_correlation),unique(as_tibble(associated.genes_id)))
save(hg19_correlation, hg38_correlation, file = paste0(tumor,"_correlation_final.rda"))
if(any(hg19_correlation$status == "Strongly negatively correlated (SNC)")){
y <- hg19_correlation[hg19_correlation$status == "Strongly negatively correlated (SNC)",]
z <- hg38_correlation[hg38_correlation$status == "Strongly negatively correlated (SNC)",]
z <- unique(z[!z$ensembl_gene_id %in% y$ensembl_gene_id,c(8:9)])
z$tumor <- tumor
snc.in.hg38.not.in.hg19 <- rbind(snc.in.hg38.not.in.hg19,z)
}
}
readr::write_csv(unique(as.data.frame(snc.in.hg38.not.in.hg19)), path = "snc_in_hg38_not_in_hg19.csv")
# Evaluating only negative
load(file.path(root, "associated.genes_id_hg19.rda"))
load(file.path(root, "associated.genes_id.rda"))
colnames(associated.genes_id.hg19)[2] <- "probe"
associated.genes_id <- associated.genes_id[as.numeric(as.character(associated.genes_id$distance)) < 0,]
associated.genes_id$distance <- NULL
associated.genes_id <- unique(associated.genes_id)
files <- dir(pattern = "correlation.csv",recursive = T,full.names = T)
tumors <- basename(dirname(dirname(files)))
snc.in.hg38.not.in.hg19 <- NULL
wnc.in.hg38.not.in.hg19 <- NULL
wnc.in.hg38.not.in.hg19.all <- NULL
snc.in.hg38.not.in.hg19.all <- NULL
for(tumor in tumors){
print(tumor)
f <- sort(grep(tumor, files,value = T))
hg19_correlation <- read_csv(f[1])
colnames(hg19_correlation)[grep("cg",hg19_correlation[1,])] <- "probe"
hg38_correlation <- read_csv(f[2])
colnames(hg38_correlation)[grep("ENSG",hg38_correlation[1,])] <- "ensembl_gene_id"
hg19_correlation <- left_join(as_tibble(hg19_correlation),unique(as_tibble(associated.genes_id.hg19)))
hg38_correlation <- left_join(as_tibble(hg38_correlation),unique(as_tibble(associated.genes_id)))
save(hg19_correlation,hg38_correlation, file = paste0(tumor,"_correlation_only_negative.rda"))
if(any(hg19_correlation$status == "Strongly negatively correlated (SNC)")){
y <- hg19_correlation[hg19_correlation$status == "Strongly negatively correlated (SNC)",]
z <- hg38_correlation[hg38_correlation$status == "Strongly negatively correlated (SNC)",]
z$tumor <- tumor
aux <- unique(z[!z$ensembl_gene_id %in% y$ensembl_gene_id,c(8:9)])
snc.in.hg38.not.in.hg19 <- rbind(snc.in.hg38.not.in.hg19,aux)
aux <- unique(z[!z$ensembl_gene_id %in% y$ensembl_gene_id,])
snc.in.hg38.not.in.hg19.all <- rbind(snc.in.hg38.not.in.hg19.all,aux)
}
if(any(hg19_correlation$status == "Weakly negatively correlated (WNC)")){
y <- hg19_correlation[hg19_correlation$status == "Weakly negatively correlated (WNC)",]
z <- hg38_correlation[hg38_correlation$status == "Weakly negatively correlated (WNC)",]
z$tumor <- tumor
aux <- unique(z[!z$ensembl_gene_id %in% y$ensembl_gene_id,c(8:9)])
wnc.in.hg38.not.in.hg19 <- rbind(wnc.in.hg38.not.in.hg19,aux)
aux <- unique(z[!z$ensembl_gene_id %in% y$ensembl_gene_id,])
wnc.in.hg38.not.in.hg19.all <- rbind(wnc.in.hg38.not.in.hg19.all,aux)
}
}
readr::write_csv(unique(as.data.frame(snc.in.hg38.not.in.hg19.all)),
path = "snc_in_hg38_negative_1500bp_not_in_hg19_negative_1500bp_all_info.csv")
readr::write_csv(unique(as.data.frame(wnc.in.hg38.not.in.hg19.all)),
path = "wnc_in_hg38_negative_1500bp_not_in_hg19_negative_1500bp_all_info.csv")
readr::write_csv(unique(as.data.frame(snc.in.hg38.not.in.hg19)),
path = "snc_in_hg38_negative_1500bp_not_in_hg19_negative_1500bp.csv")
readr::write_csv(unique(as.data.frame(wnc.in.hg38.not.in.hg19)),
path = "wnc_in_hg38_negative_1500bp_not_in_hg19_negative_1500bp.csv")
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.14.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid parallel stats4 stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] VennDiagram_1.6.20 futile.logger_1.4.3 plyr_1.8.4
## [4] reshape2_1.4.3 tidyr_0.8.3 htmltools_0.3.6
## [7] plotly_4.8.0 ggpubr_0.2 magrittr_1.5
## [10] ggplot2_3.1.0 knitr_1.21 dplyr_0.8.0.1
## [13] ELMER_2.6.1 ELMER.data_2.6.0 TCGAbiolinks_2.11.4
## [16] S4Vectors_0.20.1 BiocGenerics_0.28.0
##
## loaded via a namespace (and not attached):
## [1] R.utils_2.8.0 tidyselect_0.2.5
## [3] RSQLite_2.1.1 AnnotationDbi_1.44.0
## [5] htmlwidgets_1.3 BiocParallel_1.16.5
## [7] DESeq_1.34.1 munsell_0.5.0
## [9] codetools_0.2-16 preprocessCore_1.44.0
## [11] DT_0.5 withr_2.1.2
## [13] colorspace_1.4-0 Biobase_2.42.0
## [15] rstudioapi_0.9.0 labeling_0.3
## [17] GenomeInfoDbData_1.2.0 hwriter_1.3.2
## [19] KMsurv_0.1-5 bit64_0.9-7
## [21] downloader_0.4 generics_0.0.2
## [23] lambda.r_1.2.3 xfun_0.5
## [25] biovizBase_1.30.1 ggthemes_4.1.0
## [27] randomForest_4.6-14 EDASeq_2.16.2
## [29] R6_2.4.0 doParallel_1.0.14
## [31] GenomeInfoDb_1.18.1 locfit_1.5-9.1
## [33] AnnotationFilter_1.6.0 bitops_1.0-6
## [35] reshape_0.8.8 DelayedArray_0.8.0
## [37] assertthat_0.2.0 promises_1.0.1
## [39] scales_1.0.0 nnet_7.3-12
## [41] gtable_0.2.0 sva_3.30.1
## [43] wheatmap_0.1.0 sesameData_1.0.0
## [45] ensembldb_2.6.3 rlang_0.3.1
## [47] genefilter_1.64.0 cmprsk_2.2-7
## [49] GlobalOptions_0.1.0 splines_3.5.1
## [51] rtracklayer_1.42.1 lazyeval_0.2.1
## [53] acepack_1.4.1 dichromat_2.0-0
## [55] selectr_0.4-1 broom_0.5.1
## [57] checkmate_1.9.0 BiocManager_1.30.4
## [59] yaml_2.2.0 crosstalk_1.0.0
## [61] GenomicFeatures_1.34.1 backports_1.1.3
## [63] httpuv_1.4.5.1 Hmisc_4.1-1
## [65] tools_3.5.1 RColorBrewer_1.1-2
## [67] DNAcopy_1.56.0 MultiAssayExperiment_1.8.1
## [69] Rcpp_1.0.0 base64enc_0.1-3
## [71] progress_1.2.0 zlibbioc_1.28.0
## [73] purrr_0.3.1 RCurl_1.95-4.12
## [75] prettyunits_1.0.2 rpart_4.1-13
## [77] GetoptLong_0.1.7 zoo_1.8-4
## [79] SummarizedExperiment_1.12.0 ggrepel_0.8.0
## [81] cluster_2.0.7-1 futile.options_1.0.1
## [83] data.table_1.12.0 circlize_0.4.5
## [85] survminer_0.4.3 ProtGenerics_1.14.0
## [87] matrixStats_0.54.0 aroma.light_3.12.0
## [89] hms_0.4.2 mime_0.6
## [91] evaluate_0.13 xtable_1.8-3
## [93] XML_3.98-1.18 IRanges_2.16.0
## [95] gridExtra_2.3 shape_1.4.4
## [97] compiler_3.5.1 biomaRt_2.38.0
## [99] tibble_2.0.1 crayon_1.3.4
## [101] R.oo_1.22.0 mgcv_1.8-26
## [103] later_0.7.5 Formula_1.2-3
## [105] geneplotter_1.60.0 DBI_1.0.0
## [107] formatR_1.5 ExperimentHub_1.8.0
## [109] matlab_1.0.2 ComplexHeatmap_1.99.5
## [111] ShortRead_1.40.0 Matrix_1.2-15
## [113] readr_1.3.1 R.methodsS3_1.7.1
## [115] Gviz_1.26.4 GenomicRanges_1.34.0
## [117] pkgconfig_2.0.2 km.ci_0.5-2
## [119] sesame_1.0.0 GenomicAlignments_1.18.1
## [121] foreign_0.8-71 xml2_1.2.0
## [123] foreach_1.4.4 annotate_1.60.0
## [125] XVector_0.22.0 rvest_0.3.2
## [127] stringr_1.4.0 VariantAnnotation_1.28.7
## [129] digest_0.6.18 ConsensusClusterPlus_1.46.0
## [131] Biostrings_2.50.2 rmarkdown_1.11
## [133] survMisc_0.5.5 htmlTable_1.13.1
## [135] edgeR_3.24.3 curl_3.3
## [137] shiny_1.2.0 Rsamtools_1.34.0
## [139] rjson_0.2.20 nlme_3.1-137
## [141] jsonlite_1.6 viridisLite_0.3.0
## [143] limma_3.38.3 BSgenome_1.50.0
## [145] pillar_1.3.1 lattice_0.20-38
## [147] httr_1.4.0 survival_2.43-3
## [149] interactiveDisplayBase_1.20.0 glue_1.3.0
## [151] iterators_1.0.10 bit_1.1-14
## [153] stringi_1.3.1 blob_1.1.1
## [155] AnnotationHub_2.14.2 latticeExtra_0.6-28
## [157] memoise_1.1.0