library(dplyr)
library(minfi)
cohort <- "GASPARONI"
data.dir <- file.path("DATASETS/",cohort,"/")
data.dir.table <- "DATASETS/Summary_Table/"
data.dir.raw <- file.path(data.dir,"/step1_download/")
data.dir.read <- file.path(data.dir,"/step2_read_minfi/")
data.dir.bsfilter <- file.path(data.dir,"/step3_bsConvFilter/")
data.dir.clinical.filter <- file.path(data.dir,"/step4_clinical_available_filtering/")
data.dir.probes.qc <- file.path(data.dir,"/step5_probesQC_filtering/")
data.dir.probes.normalization <- file.path(data.dir,"/step6_normalization/")
data.dir.pca <- file.path(data.dir,"/step7_pca_filtering/")
data.dir.neuron <- file.path(data.dir,"/step8_neuron_comp/")
data.dir.single.cpg.pval <- file.path(data.dir,"/step9_single_cpg_pval/")
data.dir.residuals <- file.path(data.dir,"/step10_residuals/")
data.dir.median <- file.path(data.dir,"/step11_median/")
for(p in grep("dir",ls(),value = T)) dir.create(get(p),recursive = TRUE,showWarnings = FALSE)
Required R libraries: GEOquery
can be installed as following:
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("GEOquery")
Data from: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE66351
library(GEOquery)
library(SummarizedExperiment)
test <- getGEO(GEO = "GSE66351",destdir = data.dir.raw)
se <- test$GSE66351_series_matrix.txt.gz %>% makeSummarizedExperimentFromExpressionSet()
se <- se[,se$source_name_ch1 == "Frontal Cortex" &
se$characteristics_ch1 == "cell type: bulk" &
se$braak_stage.ch1 != "NA"]
We will download Idat files for all our samples
files <- c(se$supplementary_file %>% as.character(),
se$supplementary_file.1 %>% as.character())
dir.create(data.dir.raw,showWarnings = FALSE,recursive = TRUE)
plyr::a_ply(files,.margins = 1,.fun = function(url){
out.file <- file.path(data.dir.raw,basename(url))
if(!file.exists(out.file)) downloader::download(url,out.file)
gunzip(out.file)
},.progress = "time")
Description:
Input:
Output:
RGSet <- read.metharray.exp(base = data.dir.raw,
recursive = TRUE,
verbose = TRUE) #dim: 622399 60
phenoData <- colData(se)
save(RGSet,phenoData, file = paste0(data.dir.read, "Gasparoni.rda"))
Removing samples with bisulfiteConversion lower than 88.
library(wateRmelon)
library(RPMM)
#library(sesame)
#library(sesameData)
load(file = paste0(data.dir.read, "Gasparoni.rda"))
bs <- data.frame(bisulfiteConversion = bscon(RGSet))
## Loading required package: IlluminaHumanMethylation450kmanifest
bsFilteredOut <- row.names(bs)[bs$bisulfiteConversion < 88]
nb.samples <- ncol(RGSet)
RGSet <- RGSet[,!colnames(RGSet) %in% bsFilteredOut]
nb.samples.bc.filtered <- ncol(RGSet)
phenoData <- phenoData[match(substr(colnames(RGSet),1,10), phenoData$geo_accession),]
save(RGSet,
nb.samples,
bs,
phenoData,
nb.samples.bc.filtered,
file = paste0(data.dir.bsfilter, "/RGSet_bsfiltered.rda"))
dim(phenoData)
## [1] 57 49
phenoData$braak_stage.ch1 <- phenoData$braak_stage.ch1 %>% as.numeric()
phenoData$age.ch1 <- phenoData$age.ch1 %>% as.numeric()
### Subset rows and columns
pheno_df <- phenoData %>% as.data.frame() %>%
dplyr::filter(
source_name_ch1 == "Frontal Cortex" &
!is.na(phenoData$braak_stage.ch1) &
phenoData$characteristics_ch1 == "cell type: bulk"
) %>% dplyr::select(
c(
"geo_accession",
"donor_id.ch1",
"sentrix_id.ch1",
"age.ch1",
"Sex.ch1",
"braak_stage.ch1"
)
)
### Rename vars
colnames(pheno_df) <- c(
"sample", "subject.id", "slide", "age.brain", "sex", "stage"
)
dim(pheno_df)
## [1] 57 6
nb.samples.with.clinical <- nrow(pheno_df)
## phenotype dataset
save(RGSet,
nb.samples.with.clinical,
pheno_df,
file = paste0(data.dir.clinical.filter, "/gasparoni_bs_and_clinical_filtered.rda"))
Input:
Output:
##### 1. subset to probes with detection P <= 0.01 #############################
### Read in RGSet and beta_mat
load(paste0(data.dir.clinical.filter, "/gasparoni_bs_and_clinical_filtered.rda"))
### subset to probes with detection P <= 0.01
library(minfi)
beta_mat <- getBeta(RGSet)
nb.probes <- nrow(beta_mat)
detP <- detectionP(RGSet, type = "m+u")
failed.01 <- detP > 0.01
passedProbes <- rownames(failed.01)[rowMeans(failed.01) == 0]
beta_mat <- beta_mat[passedProbes, ]
nb.probes.detectP <- nrow(beta_mat)
##### 2. keep only probes that start with "cg" #################################
beta_mat <- beta_mat[grep("cg",rownames(beta_mat)),]
dim(beta_mat)
## [1] 475979 57
nb.probes.detectP.cg <- nrow(beta_mat)
##### 3. drop probes that are on X/Y ###########################################
##### 4. drop probes where SNP with MAF >= 0.01 in the last 5 bp of the probe ##
library(DMRcate)
beta_mat <- rmSNPandCH(
object = beta_mat,
dist = 5,
mafcut = 0.01,
and = TRUE,
rmcrosshyb = FALSE,
rmXY = TRUE
)
nb.probes.cg.dmrcate <- nrow(beta_mat)
##### 5. Output datasets #######################################################
save(
pheno_df,
nb.probes.detectP,
nb.probes.detectP.cg,
nb.probes.cg.dmrcate,
beta_mat,
file = paste0(data.dir.probes.qc, "beta_CG_XY_SNPfiltered_mat.rda")
)
Input:
Output:
load(paste0(data.dir.probes.qc, "beta_CG_XY_SNPfiltered_mat.rda"))
library(lumi)
betaQN <- lumiN(x.lumi = beta_mat, method = "quantile")
## Perform quantile normalization ...
dim(betaQN)
## [1] 446792 57
library(wateRmelon)
library(RPMM)
library(sesame)
library(sesameData)
### BMIQ
set.seed (946)
doParallel::registerDoParallel(cores = 8)
betaQN_BMIQ <- plyr::aaply(
betaQN, 2,
function(x){
norm_ls <- BMIQ(x, design.v = type12, plots = FALSE)
return (norm_ls$nbeta)
},.progress = "time",.parallel = TRUE
) %>% t()
colnames(betaQN_BMIQ) <- substr(colnames(betaQN_BMIQ),1,stringr::str_length(pheno_df$sample) %>% unique)
save(betaQN_BMIQ, pheno_df, file = paste0(data.dir.probes.normalization, "GASPARONI_QNBMIQ.rda"))
Input:
Output:
# plotPCA and OrderDataBySd functions
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
## Sourcing https://gist.githubusercontent.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e/raw/a14424662da343c1301b7b2f03210d28d16ae05c/functions.R
## SHA-1 hash of file is ef6f39dc4e5eddb5ca1c6e5af321e75ff06e9362
### merge in group info
## add center and scale
## compare M values vs. beta values
## with and without center / scale
##### 0.Import datasets ########################################################
load(paste0(data.dir.probes.normalization, "GASPARONI_QNBMIQ.rda"))
identical(colnames(betaQN_BMIQ), pheno_df$sample)
## [1] TRUE
### transform to m values
mvalue_mat <- log2(betaQN_BMIQ / (1 - betaQN_BMIQ)) #dim: 437713 142
pheno_df <- subset(pheno_df, pheno_df$sample %in% colnames(betaQN_BMIQ)) #dim: 142 7
# both_df$stage <- as.numeric(as.character(both_df$stage))
pheno_df$stage3 <- pheno_df$stage
pheno_df$stage3[pheno_df$stage <= 2] <- '0-2'
pheno_df$stage3[pheno_df$stage > 2 & pheno_df$stage < 5] <- '3-4'
pheno_df$stage3[pheno_df$stage >= 5] <- '5-6'
##### 1.Order matrix by most variable probes on top ############################
betaOrd_mat <- OrderDataBySd(betaQN_BMIQ) #dim: 391482 142
mOrd_mat <- OrderDataBySd(mvalue_mat) #dim: 391482 142
betaOrd_matPlot <- betaOrd_mat[, pheno_df$sample] #dim: 391482 142
mOrd_matPlot <- mOrd_mat[, pheno_df$sample] #dim: 391482 142
identical(pheno_df$sample, colnames(betaOrd_matPlot))
## [1] TRUE
identical(pheno_df$sample, colnames(mOrd_matPlot))
## [1] TRUE
expSorted_mat = betaOrd_mat #dim: 391482 142
pca <- prcomp(t(expSorted_mat[1:50000, ]),
center = TRUE,
scale = TRUE)
d <- data.frame(PC1 = pca$x[, 1], PC2 = pca$x[, 2])
meanPC1 <- mean (d$PC1)
sdPC1 <- sd (d$PC1)
meanPC2 <- mean (d$PC2)
sdPC2 <- sd (d$PC2)
out3sdPC1_1 <- meanPC1 - 3 * sdPC1
out3sdPC1_2 <- meanPC1 + 3 * sdPC1
out3sdPC2_1 <- meanPC2 - 3 * sdPC2
out3sdPC2_2 <- meanPC2 + 3 * sdPC2
d$outlier_PC1[d$PC1 >= out3sdPC1_1 & d$PC1 <= out3sdPC1_2] <- 0
d$outlier_PC1[d$PC1 < out3sdPC1_1 | d$PC1 > out3sdPC1_2] <- 1
d$outlier_PC2[d$PC2 >= out3sdPC2_1 & d$PC2 <= out3sdPC2_2] <- 0
d$outlier_PC2[d$PC2 < out3sdPC2_1 | d$PC2 > out3sdPC2_2] <- 1
readr::write_csv(d, paste0(data.dir.pca, "GASPARONI_PCs_usingBetas.csv"))
##### 2.Filter samples by PCA, save files ######################################
noOutliers <- d[which(d$outlier_PC1 == 0 & d$outlier_PC2 == 0), ]
betaQN_BMIQ_PCfiltered <- betaQN_BMIQ[, rownames(noOutliers)] #dim: 433656 59
saveRDS(betaQN_BMIQ_PCfiltered, paste0(data.dir.pca, "Gasparoni_QNBMIQ_PCfiltered.RDS"))
pheno_df <- pheno_df[pheno_df$sample %in% rownames(noOutliers),] #dim: 59 6
saveRDS(pheno_df, paste0(data.dir.pca, "pheno_df.RDS"))
nb.samples.with.clinical.after.pca <- ncol(betaQN_BMIQ_PCfiltered)
dim(betaQN_BMIQ_PCfiltered)
## [1] 446792 56
dim(pheno_df)
## [1] 56 7
pheno_df %>%
DT::datatable(filter = 'top',
style = "bootstrap",
extensions = 'Buttons',
options = list(scrollX = TRUE,
dom = 'Bfrtip',
buttons = I('colvis'),
keys = TRUE,
pageLength = 10),
rownames = FALSE,
caption = "Samples metadata")
df.samples <- data.frame("Number of samples" = c(nb.samples,
nb.samples.bc.filtered,
nb.samples.with.clinical,
nb.samples.with.clinical.after.pca),
"Description" = c("total number of samples",
"samples with bisulfate conversion > 88",
"samples with clinical data",
"Samples after PCA"),
"Difference" = c("-",
nb.samples.bc.filtered - nb.samples ,
nb.samples.with.clinical - nb.samples.bc.filtered ,
nb.samples.with.clinical.after.pca - nb.samples.with.clinical)
)
df.samples
# Create summary table
df.probes <- data.frame("Number of probes" = c(nb.probes,
nb.probes.detectP,
nb.probes.detectP.cg,
nb.probes.cg.dmrcate),
"Description" = c("total number of probes in raw data",
"detection P < 0.01",
"only probes that start with cg",
"DMRcate"),
"Difference" = c("-",
nb.probes.detectP - nb.probes ,
nb.probes.detectP.cg - nb.probes.detectP,
nb.probes.cg.dmrcate - nb.probes.detectP.cg)
)
df.probes
save(df.samples,df.probes,file = file.path(data.dir.table, "GASPARONI_table.rda"))
Data from https://www.tandfonline.com/doi/full/10.4161/epi.23924
Input: Gasparoni_QNBMIQ_PCfiltered.RDS, pheno_df.RDS
Output: pheno_withNeuronProp_df.RDS
objects <- load("../../CET/CETS_Image.RData")
objects
## [1] "a" "a1" "b" "b1"
## [5] "brain" "c" "dat" "deconvolute"
## [9] "dilution" "estProportion" "f" "g2"
## [13] "getF" "getReference" "i" "idx"
## [17] "modelIdx" "p" "pdBrain" "prop"
## [21] "r2" "refProfile" "res" "rowttests"
## [25] "variables"
idx <- list(
controlNeuron = pdBrain$celltype == "N" & pdBrain$diag == "Control" & pdBrain$ethnicity == "Caucasian",
controlGlia = pdBrain$celltype == "G" & pdBrain$diag == "Control" & pdBrain$ethnicity == "Caucasian"
)
refProfile <- getReference(brain, idx)
##### 2. Estimate proportions of neurons in PFC samples ########################
### Limit to 10,000 cpgs in the refProfile dataset
pfc <- readRDS(paste0(data.dir.pca, "Gasparoni_QNBMIQ_PCfiltered.RDS")) #dim: 433656 59
selected <- rownames(pfc) %in% rownames(refProfile)
pfc.refcpgs <- pfc[selected, ]
### Estimate proportion of neurons
prop <- data.frame(estProportion(pfc.refcpgs, profile = refProfile))
colnames(prop) <- "prop.neuron"
##### 3. Merge pfc.refcpgs with phenotype file #################################
pheno <- readRDS(paste0(data.dir.pca, "pheno_df.RDS"))
pheno_final <- merge(
pheno,
prop,
by.x = "sample",
by.y = "row.names"
)
saveRDS(pheno_final, paste0(data.dir.neuron, "pheno_withNeuronProp_df.RDS"))
Input:
Output:
beta_mat <- readRDS(paste0(data.dir.pca, "Gasparoni_QNBMIQ_PCfiltered.RDS"))
pheno_df <- readRDS(paste0(data.dir.neuron, "pheno_withNeuronProp_df.RDS"))
### Compute M values
mval_mat <- log2(beta_mat / (1 - beta_mat))
pheno_df$Sample <- pheno_df$sample
identical(pheno_df$Sample, colnames(mval_mat))
## [1] TRUE
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$slide <- as.factor(pheno_df$slide)
# If rosmap cohort, don't forget batch effect
str(pheno_df)
## 'data.frame': 56 obs. of 9 variables:
## $ sample : chr "GSM2808939" "GSM2808940" "GSM2808941" "GSM2808942" ...
## $ subject.id : chr "RZ052" "RZ055" "RZ057" "RZ058" ...
## $ slide : Factor w/ 6 levels "5854945010","5854945021",..: 3 3 3 3 3 3 3 3 4 4 ...
## $ age.brain : num 82 80 88 77 75 73 67 74 53 18 ...
## $ sex : Factor w/ 2 levels "F","M": 1 2 1 1 2 2 1 2 2 2 ...
## $ stage : num 5 5 2 2 2 6 6 6 0 0 ...
## $ stage3 : chr "5-6" "5-6" "0-2" "0-2" ...
## $ prop.neuron: num 0.374 0.403 0.431 0.24 0.343 ...
## $ Sample : chr "GSM2808939" "GSM2808940" "GSM2808941" "GSM2808942" ...
is(pheno_df$stage,"numeric")
## [1] TRUE
is(pheno_df$age.brain,"numeric")
## [1] TRUE
is(pheno_df$prop.neuron,"numeric")
## [1] TRUE
predictors_char <- "stage"
covariates_char <- c("age.brain", "sex", "prop.neuron", "slide")
doParallel::registerDoParallel(cores = parallel::detectCores()/2)
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
results_ordered_df <- plyr::adply(mval_mat,1, function(row){
sumOneRegion_df <- data.frame(t(row))
result <- TestSingleRegion(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
sumOneRegion_df = sumOneRegion_df
)
result
}, .progress = "time",.parallel = TRUE,.id = "cpg")
colnames(results_ordered_df)[1] <- "cpg"
identical(row.names(mval_mat), results_ordered_df$cpg %>% as.character())
results_ordered_df$fdr <- p.adjust(
results_ordered_df$pValue,
method = "fdr"
)
write.csv(
results_ordered_df,
paste0(data.dir.single.cpg.pval, "Gasparoni_single_cpg_pVal_df.csv"),
row.names = FALSE
)
results_ordered_df <- readr::read_csv(
paste0(data.dir.single.cpg.pval, "Gasparoni_single_cpg_pVal_df.csv"),
col_types = readr::cols())
results_ordered_df
Input:
Output:
##### 1. Import datasets #######################################################
beta_mat <- readRDS(paste0(data.dir.pca, "Gasparoni_QNBMIQ_PCfiltered.RDS")) #dim:433656 59
pheno_df <- readRDS(paste0(data.dir.neuron, "pheno_withNeuronProp_df.RDS")) #dim:59 7
### Compute M values
mvalue_mat <- log2(beta_mat / (1 - beta_mat))
### Reorder samples based on pheno_df
mvalue_mat <- mvalue_mat[, pheno_df$sample]
identical(colnames(mvalue_mat), pheno_df$sample)
### Take residuals
lmF <- function(mval){
fitE <- lm(
as.numeric(mval) ~ age.brain + sex + prop.neuron + as.character(slide), #add batch if rosmap
data = pheno_df,
na.action = na.exclude
)
residuals (fitE)
}
doParallel::registerDoParallel(cores = 4)
resid <- plyr::adply(mvalue_mat,1,.fun = lmF,.progress = "time",.parallel = TRUE)
rownames(resid) <- resid[,1]
resid[,1] <- NULL
colnames(resid) <- colnames(mvalue_mat)
dim(resid)
dim(mvalue_mat)
### Save dataset
saveRDS(
resid,
paste0(data.dir.residuals, "Gasparoni_QNBMIQ_PCfiltered_mvalResiduals.RDS")
)
### Import datasets
mvalue_residuals_mat <- readRDS(
paste0(data.dir.residuals, "Gasparoni_QNBMIQ_PCfiltered_mvalResiduals.RDS")
)
### Call in functions
library(coMethDMR)
library(BiocParallel)
probes.cluster.all <- coMethDMR::getPredefinedCluster(arrayType = "450k",
clusterType = "regions")
ncores <- parallel::detectCores()/2
### Find co-methylated clusters
coMeth_ls <- CoMethAllRegions(
dnam = mvalue_residuals_mat,
betaToM = FALSE,
CpGs_ls = probes.cluster.all,
arrayType = "450k",
rDropThresh_num = 0.4,
minPairwiseCorr = NULL,
method = "spearman",
returnAllCpGs = TRUE,
output = "all",
nCores_int = ncores,
progressbar = FALSE
)
saveRDS(
coMeth_ls,
paste0(data.dir.residuals,"Gasparoni_residuals_cometh_input_ls.RDS")
)
Input:
Output:
### Import datasets
beta_mat <- readRDS(paste0(data.dir.pca,
"Gasparoni_QNBMIQ_PCfiltered.RDS"))
pheno_df <- readRDS(paste0(data.dir.neuron, "pheno_withNeuronProp_df.RDS"))
mval_mat <- log2(beta_mat / (1 - beta_mat)) %>% as.matrix()
coMeth_ls <- readRDS(
paste0(data.dir.residuals, "Gasparoni_residuals_cometh_input_ls.RDS")
)
### Create info dataset
input_cometh <- data.frame(
inputRegion = coMeth_ls$inputRegion_chr,
nCoMethRegion = coMeth_ls$nCoMethRegions_num,
coMethRegion = names(coMeth_ls$coMeth_ls),
nCpGs = unlist(lapply(coMeth_ls$coMeth_ls, length), use.names = FALSE),
stringsAsFactors = FALSE
)
input_cometh_nodup <- input_cometh[
!duplicated(input_cometh$coMethRegion),
]
colnames(input_cometh_nodup) <- c(
paste0(cohort, "_inputRegion"),
paste0(cohort, "_nCoMethRegion"),
paste0(cohort, "_coMethRegion"),
paste0(cohort, "_nCpGs")
)
saveRDS(
input_cometh_nodup,
paste0(data.dir.median, cohort,"_info_df.rds")
)
### Take median of probes in each cluster for each sample
filename <- paste0(paste0(data.dir.median, cohort, "_mediansMval_df.rds"))
library(robustbase)
mval_mat <- mval_mat[rownames(mval_mat) %in% unlist(coMeth_ls$coMeth_ls),]
if(!file.exists(filename)){
medianMval.df <- plyr::ldply(
coMeth_ls$coMeth_ls[!duplicated(names(coMeth_ls$coMeth_ls))],
function(probes){
colMedians(mval_mat[as.character(probes),], na.rm = TRUE)
},
.progress = "time"
)
medianMval.df$.id <- NULL
colnames(medianMval.df) <- colnames(mval_mat)
saveRDS(medianMval.df, file = filename)
} else {
medianMval.df <- readRDS(filename)
}
### Import datasets
cohort <- "Gasparoni"
info_df <- readRDS(dir(data.dir.median, pattern = "info", full.names = TRUE))
mediansMval_df <- readRDS(dir(data.dir.median, pattern = "mediansMval", full.names = TRUE))
pheno_df <- readRDS(paste0(data.dir.neuron, "pheno_withNeuronProp_df.RDS"))
### Check variables before fitting model
pheno_df$Sample <- pheno_df$sample
identical(pheno_df$Sample, colnames(mediansMval_df))
## [1] TRUE
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$slide <- as.factor(pheno_df$slide)
# If rosmap cohort, don't forget batch effect
str(pheno_df)
## 'data.frame': 56 obs. of 9 variables:
## $ sample : chr "GSM2808939" "GSM2808940" "GSM2808941" "GSM2808942" ...
## $ subject.id : chr "RZ052" "RZ055" "RZ057" "RZ058" ...
## $ slide : Factor w/ 6 levels "5854945010","5854945021",..: 3 3 3 3 3 3 3 3 4 4 ...
## $ age.brain : num 82 80 88 77 75 73 67 74 53 18 ...
## $ sex : Factor w/ 2 levels "F","M": 1 2 1 1 2 2 1 2 2 2 ...
## $ stage : num 5 5 2 2 2 6 6 6 0 0 ...
## $ stage3 : chr "5-6" "5-6" "0-2" "0-2" ...
## $ prop.neuron: num 0.374 0.403 0.431 0.24 0.343 ...
## $ Sample : chr "GSM2808939" "GSM2808940" "GSM2808941" "GSM2808942" ...
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
predictors_char <- "stage"
covariates_char <- c("age.brain", "sex", "prop.neuron", "slide")
res_df <- TestAllRegions_noInfo(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
summarizedRegions_df = mediansMval_df
)
colnames(res_df) <- c(
paste0(cohort, "_estimate"),
paste0(cohort, "_se"),
paste0(cohort, "_pVal"),
paste0(cohort, "_fdr")
)
res_withInfo_df <- cbind(info_df, res_df)
saveRDS(
res_withInfo_df,
paste0(data.dir.median, cohort, "_linear_df.rds")
)
file <- dir(data.dir.median,pattern = paste0(".*linear_df"),
recursive = T,
full.names = TRUE,
ignore.case = T)
file
## [1] "DATASETS//GASPARONI////step11_median//Gasparoni_linear_df.rds"
res_withInfo_df <- readRDS(file)
dim(res_withInfo_df)
## [1] 39865 8
res_withInfo_df
dir(path = data.dir,recursive = T, pattern = ".rda|.csv|.RDS")
## [1] "step10_residuals/Gasparoni_QNBMIQ_PCfiltered_mvalResiduals.RDS"
## [2] "step10_residuals/Gasparoni_residuals_cometh_input_ls.RDS"
## [3] "step2_read_minfi/Gasparoni.rda"
## [4] "step2_read_minfi/GSE66351_pheno.csv"
## [5] "step3_bsConvFilter/RGSet_bsfiltered.rda"
## [6] "step4_clinical_available_filtering/gasparoni_bs_and_clinical_filtered.rda"
## [7] "step5_probesQC_filtering/beta_CG_XY_SNPfiltered_mat.rda"
## [8] "step6_normalization/GASPARONI_QNBMIQ.rda"
## [9] "step6_normalization/GASPARONI_QNBMIQ.RDS"
## [10] "step7_pca_filtering/GASPARONI_PCs_usingBetas.csv"
## [11] "step7_pca_filtering/Gasparoni_QNBMIQ_PCfiltered.RDS"
## [12] "step7_pca_filtering/pheno_df.RDS"
## [13] "step8_neuron_comp/pheno_withNeuronProp_df.RDS"
## [14] "step9_single_cpg_pval/Gasparoni_single_cpg_pVal_df.csv"
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R Under development (unstable) (2020-02-25 r77857)
## os macOS Catalina 10.15.3
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-04-30
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib
## acepack 1.4.1 2016-10-29 [1]
## affy 1.65.1 2019-11-06 [1]
## affyio 1.57.0 2019-11-06 [1]
## annotate 1.65.1 2020-01-27 [1]
## AnnotationDbi * 1.49.1 2020-01-25 [1]
## AnnotationFilter 1.11.0 2019-11-06 [1]
## AnnotationHub * 2.19.12 2020-04-21 [1]
## askpass 1.1 2019-01-13 [1]
## assertthat 0.2.1 2019-03-21 [1]
## backports 1.1.6 2020-04-05 [1]
## base64 2.0 2016-05-10 [1]
## base64enc 0.1-3 2015-07-28 [1]
## beanplot 1.2 2014-09-19 [1]
## Biobase * 2.47.3 2020-03-16 [1]
## BiocFileCache * 1.11.6 2020-04-16 [1]
## BiocGenerics * 0.33.3 2020-03-23 [1]
## BiocManager 1.30.10 2019-11-16 [1]
## BiocParallel 1.21.3 2020-04-21 [1]
## BiocVersion 3.11.1 2019-11-13 [1]
## biomaRt 2.43.6 2020-04-21 [1]
## Biostrings * 2.55.7 2020-03-24 [1]
## biovizBase 1.35.1 2019-12-03 [1]
## bit 1.1-15.2 2020-02-10 [1]
## bit64 0.9-7 2017-05-08 [1]
## bitops 1.0-6 2013-08-17 [1]
## blob 1.2.1 2020-01-20 [1]
## BSgenome 1.55.4 2020-03-19 [1]
## bsseq 1.23.2 2020-04-06 [1]
## bumphunter * 1.29.0 2019-11-07 [1]
## callr 3.4.3 2020-03-28 [1]
## cellranger 1.1.0 2016-07-27 [1]
## checkmate 2.0.0 2020-02-06 [1]
## cli 2.0.2 2020-02-28 [1]
## cluster * 2.1.0 2019-06-19 [1]
## codetools 0.2-16 2018-12-24 [1]
## colorspace 1.4-1 2019-03-18 [1]
## crayon 1.3.4 2017-09-16 [1]
## crosstalk 1.1.0.1 2020-03-13 [1]
## curl 4.3 2019-12-02 [1]
## data.table 1.12.9 2020-02-26 [1]
## DBI 1.1.0 2019-12-15 [1]
## dbplyr * 1.4.3 2020-04-19 [1]
## DelayedArray * 0.13.12 2020-04-10 [1]
## DelayedMatrixStats 1.9.1 2020-03-30 [1]
## desc 1.2.0 2018-05-01 [1]
## devtools 2.3.0 2020-04-10 [1]
## dichromat 2.0-0 2013-01-24 [1]
## digest 0.6.25 2020-02-23 [1]
## DMRcate * 2.1.9 2020-03-28 [1]
## DMRcatedata * 2.5.0 2019-10-31 [1]
## DNAcopy 1.61.0 2019-11-06 [1]
## doRNG 1.8.2 2020-01-27 [1]
## dplyr * 0.8.99.9002 2020-04-02 [1]
## DSS 2.35.1 2020-04-14 [1]
## DT 0.13 2020-03-23 [1]
## edgeR 3.29.1 2020-02-26 [1]
## ellipsis 0.3.0 2019-09-20 [1]
## ensembldb 2.11.4 2020-04-17 [1]
## evaluate 0.14 2019-05-28 [1]
## ExperimentHub * 1.13.8 2020-04-21 [1]
## fansi 0.4.1 2020-01-08 [1]
## fastmap 1.0.1 2019-10-08 [1]
## FDb.InfiniumMethylation.hg19 * 2.2.0 2020-04-09 [1]
## foreach * 1.5.0 2020-03-30 [1]
## foreign 0.8-79 2020-04-26 [1]
## Formula 1.2-3 2018-05-03 [1]
## fs 1.4.1 2020-04-04 [1]
## genefilter 1.69.0 2019-11-06 [1]
## generics 0.0.2 2018-11-29 [1]
## GenomeInfoDb * 1.23.17 2020-04-13 [1]
## GenomeInfoDbData 1.2.3 2020-04-20 [1]
## GenomicAlignments 1.23.2 2020-03-24 [1]
## GenomicFeatures * 1.39.7 2020-03-19 [1]
## GenomicRanges * 1.39.3 2020-04-08 [1]
## GEOquery 2.55.2 2020-04-23 [1]
## ggplot2 * 3.3.0 2020-03-05 [1]
## glue 1.4.0 2020-04-03 [1]
## gridExtra 2.3 2017-09-09 [1]
## gtable 0.3.0 2019-03-25 [1]
## gtools 3.8.2 2020-03-31 [1]
## Gviz 1.31.13 2020-04-21 [1]
## HDF5Array 1.15.18 2020-04-10 [1]
## Hmisc 4.4-0 2020-03-23 [1]
## hms 0.5.3 2020-01-08 [1]
## htmlTable 1.13.3 2019-12-04 [1]
## htmltools 0.4.0 2019-10-04 [1]
## htmlwidgets 1.5.1 2019-10-08 [1]
## httpuv 1.5.2 2019-09-11 [1]
## httr 1.4.1 2019-08-05 [1]
## IlluminaHumanMethylation450kanno.ilmn12.hg19 * 0.6.0 2020-03-24 [1]
## IlluminaHumanMethylation450kmanifest * 0.4.0 2020-04-09 [1]
## IlluminaHumanMethylationEPICanno.ilm10b4.hg19 0.6.0 2020-04-09 [1]
## illuminaio * 0.29.0 2019-11-06 [1]
## interactiveDisplayBase 1.25.0 2019-11-06 [1]
## IRanges * 2.21.8 2020-03-25 [1]
## iterators * 1.0.12 2019-07-26 [1]
## jpeg 0.1-8.1 2019-10-24 [1]
## jsonlite 1.6.1 2020-02-02 [1]
## KernSmooth 2.23-17 2020-04-26 [1]
## knitr 1.28 2020-02-06 [1]
## later 1.0.0 2019-10-04 [1]
## lattice 0.20-41 2020-04-02 [1]
## latticeExtra 0.6-29 2019-12-19 [1]
## lazyeval 0.2.2 2019-03-15 [1]
## lifecycle 0.2.0 2020-03-06 [1]
## limma * 3.43.8 2020-04-14 [1]
## locfit * 1.5-9.4 2020-03-25 [1]
## lumi * 2.39.3 2020-03-27 [1]
## magrittr 1.5 2014-11-22 [1]
## MASS 7.3-51.6 2020-04-26 [1]
## Matrix 1.2-18 2019-11-27 [1]
## matrixStats * 0.56.0 2020-03-13 [1]
## mclust 5.4.6 2020-04-11 [1]
## memoise 1.1.0 2017-04-21 [1]
## methylumi * 2.33.0 2019-11-06 [1]
## mgcv 1.8-31 2019-11-09 [1]
## mime 0.9 2020-02-04 [1]
## minfi * 1.33.1 2020-03-05 [1]
## missMethyl 1.21.4 2020-01-28 [1]
## multtest 2.43.1 2020-03-12 [1]
## munsell 0.5.0 2018-06-12 [1]
## nleqslv 3.3.2 2018-05-17 [1]
## nlme 3.1-147 2020-04-13 [1]
## nnet 7.3-14 2020-04-26 [1]
## nor1mix 1.3-0 2019-06-13 [1]
## openssl 1.4.1 2019-07-18 [1]
## org.Hs.eg.db * 3.10.0 2020-03-30 [1]
## permute 0.9-5 2019-03-12 [1]
## pillar 1.4.3 2019-12-20 [1]
## pkgbuild 1.0.7 2020-04-25 [1]
## pkgconfig 2.0.3 2019-09-22 [1]
## pkgload 1.0.2 2018-10-29 [1]
## plyr 1.8.6 2020-03-03 [1]
## png 0.1-7 2013-12-03 [1]
## preprocessCore 1.49.2 2020-02-01 [1]
## prettyunits 1.1.1 2020-01-24 [1]
## processx 3.4.2 2020-02-09 [1]
## progress 1.2.2 2019-05-16 [1]
## promises 1.1.0 2019-10-04 [1]
## ProtGenerics 1.19.3 2019-12-25 [1]
## ps 1.3.2 2020-02-13 [1]
## purrr 0.3.4 2020-04-17 [1]
## quadprog 1.5-8 2019-11-20 [1]
## R.methodsS3 1.8.0 2020-02-14 [1]
## R.oo 1.23.0 2019-11-03 [1]
## R.utils 2.9.2 2019-12-08 [1]
## R6 2.4.1 2019-11-12 [1]
## randomForest 4.6-14 2018-03-25 [1]
## rappdirs 0.3.1 2016-03-28 [1]
## RColorBrewer 1.1-2 2014-12-07 [1]
## Rcpp 1.0.4.6 2020-04-09 [1]
## RCurl 1.98-1.2 2020-04-18 [1]
## readr 1.3.1 2018-12-21 [1]
## readxl 1.3.1 2019-03-13 [1]
## remotes 2.1.1 2020-02-15 [1]
## reshape 0.8.8 2018-10-23 [1]
## reshape2 * 1.4.4 2020-04-09 [1]
## rhdf5 2.31.10 2020-04-02 [1]
## Rhdf5lib 1.9.3 2020-04-15 [1]
## rlang 0.4.5.9000 2020-03-20 [1]
## rmarkdown 2.1 2020-01-20 [1]
## rngtools 1.5 2020-01-23 [1]
## ROC * 1.63.0 2019-11-06 [1]
## rpart 4.1-15 2019-04-12 [1]
## RPMM * 1.25 2017-02-28 [1]
## rprojroot 1.3-2 2018-01-03 [1]
## Rsamtools 2.3.7 2020-03-18 [1]
## RSQLite 2.2.0 2020-01-07 [1]
## rstudioapi 0.11 2020-02-07 [1]
## rtracklayer 1.47.0 2019-11-06 [1]
## S4Vectors * 0.25.15 2020-04-04 [1]
## scales * 1.1.0 2019-11-18 [1]
## scrime 1.3.5 2018-12-01 [1]
## sesame * 1.5.3 2020-03-03 [1]
## sesameData * 1.5.0 2019-10-31 [1]
## sessioninfo 1.1.1 2018-11-05 [1]
## shiny 1.4.0.2 2020-03-13 [1]
## siggenes 1.61.0 2019-11-06 [1]
## statmod 1.4.34 2020-02-17 [1]
## stringi 1.4.6 2020-02-17 [1]
## stringr 1.4.0 2019-02-10 [1]
## SummarizedExperiment * 1.17.5 2020-03-27 [1]
## survival 3.1-12 2020-04-10 [1]
## testthat 2.3.2 2020-03-02 [1]
## tibble 3.0.1 2020-04-20 [1]
## tidyr 1.0.2 2020-01-24 [1]
## tidyselect 1.0.0 2020-01-27 [1]
## TxDb.Hsapiens.UCSC.hg19.knownGene * 3.2.2 2020-04-02 [1]
## usethis 1.6.0 2020-04-09 [1]
## VariantAnnotation 1.33.5 2020-04-23 [1]
## vctrs 0.2.99.9010 2020-04-02 [1]
## wateRmelon * 1.31.0 2019-11-06 [1]
## wheatmap 0.1.0 2018-03-15 [1]
## withr 2.2.0 2020-04-20 [1]
## xfun 0.13 2020-04-13 [1]
## XML 3.99-0.3 2020-01-20 [1]
## xml2 1.3.2 2020-04-23 [1]
## xtable 1.8-4 2019-04-21 [1]
## XVector * 0.27.2 2020-03-24 [1]
## yaml 2.2.1 2020-02-01 [1]
## zlibbioc 1.33.1 2020-01-24 [1]
## source
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## local
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## Github (tidyverse/dplyr@affb977)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Github (Bioconductor/GenomicRanges@70e6e69)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## Github (r-lib/rlang@a90b04b)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
## Github (r-lib/vctrs@fd24927)
## Bioconductor
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## CRAN (R 4.0.0)
## Bioconductor
## CRAN (R 4.0.0)
## Bioconductor
##
## [1] /Library/Frameworks/R.framework/Versions/4.0/Resources/library