library(minfi)
library(dplyr)
cohort <- "ROSMAP"
data.dir <- file.path("DATASETS/",cohort,"/")
data.dir.table <- "DATASETS/Summary_Table/"
data.dir.raw <- file.path(data.dir,"/step1_download/")
data.dir.raw.idat <- file.path(data.dir.raw, "AllIdat")
data.dir.raw.metadata <- file.path(data.dir.raw, "Metadata")
data.dir.read <- file.path(data.dir,"/step2_read_minfi/")
data.dir.bsfilter <- file.path(data.dir,"/step3_bsConvFilter/")
data.dir.clinical.filter <- file.path(data.dir,"/step4_clinical_available_filtering/")
data.dir.probes.qc <- file.path(data.dir,"/step5_probesQC_filtering/")
data.dir.probes.normalization <- file.path(data.dir,"/step6_normalization/")
data.dir.pca <- file.path(data.dir,"/step7_pca_filtering/")
data.dir.neuron <- file.path(data.dir,"/step8_neuron_comp/")
data.dir.single.cpg.pval <- file.path(data.dir,"/step9_single_cpg_pval/")
data.dir.residuals <- file.path(data.dir,"/step10_residuals/")
data.dir.median <- file.path(data.dir,"/step11_median/")
for(p in grep("dir",ls(),value = T)) dir.create(get(p),recursive = TRUE,showWarnings = FALSE)
Required R libraries: synapserutils and synapser can be installed as following:
install.packages("synapserutils", repos = c("http://ran.synapse.org", "http://cran.fhcrc.org"))
install.packages("synapser", repos = c("http://ran.synapse.org", "http://cran.fhcrc.org"))
library(synapser)
library(synapserutils)
# Please login to synapse using
# synLogin()
#-----------------------------------------------
# Metadata
#-----------------------------------------------
# ROSMAP_arrayMethylation_covariates.tsv
syn3157322 = syncFromSynapse(entity = 'syn3157322',
ifcollision = "keep.local",
path = data.dir.raw.metadata)
#-----------------------------------------------
# DNA methylation
#-----------------------------------------------
# ROSMAP_arrayMethylation_imputed.tsv.gz
syn3168763 <- syncFromSynapse('syn3168763',
ifcollision = "keep.local",
path = data.dir.raw)
# ROSMAP_arrayMethylation_metaData.tsv
syn3168775 = syncFromSynapse(entity = 'syn3168775',
ifcollision = "keep.local",
path = data.dir.raw)
# ROSMAP_arrayMethylation_raw.gz
syn5850422 = syncFromSynapse(entity = 'syn5850422',
ifcollision = "keep.local",
path = data.dir.raw)
syn7357283 <- syncFromSynapse(entity = 'syn7357283',
ifcollision = "keep.local",
path = data.dir.raw.idat)
Description:
Input:
Output:
RGSet <- read.metharray.exp(
base = file.path(data.dir.raw, "AllIdat"),
recursive = TRUE,
verbose = TRUE
)
saveRDS(RGSet, paste0(data.dir.read, "RGSet.RDS"))
#dnam.imputed <-
# readr::read_tsv(file.path(dir.dnam, "ROSMAP_arrayMethylation_imputed.tsv.gz"))
probes.metadata <- readr::read_tsv(file.path(data.dir.raw, "ROSMAP_arrayMethylation_metaData.tsv"))
phenoData <- readr::read_tsv(file.path(data.dir.raw, "ROSMAP_arrayMethylation_covariates.tsv"))
phenoData$Sentrix <- paste0(phenoData$Sentrix_ID, "_", phenoData$Sentrix_Position)
phenoData$Slide <- phenoData$Sentrix_ID
##### 5. BMIQ ##################################################################
library(wateRmelon)
library(RPMM)
library(sesame)
library(sesameData)
RGSet <- readRDS(file = paste0(data.dir.read, "RGSet.RDS"))
bs <- data.frame(bisulfiteConversion = bscon(RGSet))
bsFilteredOut <- row.names(bs)[bs$bisulfiteConversion < 88]
nb.samples <- ncol(RGSet)
RGSet <- RGSet[,!colnames(RGSet) %in% bsFilteredOut]
nb.samples.bc.filtered <- ncol(RGSet)
save(RGSet,
nb.samples,
bs,
nb.samples.bc.filtered,
file = paste0(data.dir.bsfilter, "RGSet_bsfiltered.rda"))
betaSet <- getBeta(RGSet)
identical(colnames(betaSet),phenoData$Sentrix) ##TRUE
colnames(betaSet) <- phenoData$Sample
save(betaSet,
RGSet,
phenoData,
file = paste0(data.dir.clinical.filter, "/ROSMAPbetaMatrixraw737ind.rda"))
load(paste0(data.dir.clinical.filter, "/ROSMAPbetaMatrixraw737ind.rda"))
nb.samples.with.slide <- ncol(betaSet)
nb.probes <- nrow(betaSet)
#--------------------------------------------------
# Read metadata
#--------------------------------------------------
clinical <- readr::read_csv(
file.path(data.dir.raw.metadata,"ROSMAP_Clinical_2019-05_v3.csv"),
col_types = readr::cols())
# change 90+ to 90
clinical$age_death <- as.numeric(gsub("\\+","",clinical$age_death))
biospecimen <- readr::read_csv(
file.path(data.dir.raw.metadata,"ROSMAP_biospecimen_metadata.csv"),
col_types = readr::cols()
)
idkey <- readr::read_csv(
file.path(data.dir.raw.metadata,"ROSMAP_IDkey.csv"),
col_types = readr::cols()
)
#--------------------------------------------------
# keep only one entries with DNA methylation
idkey <- idkey[idkey$mwas_id %in% colnames(betaSet),]
# keep only one entry for each DNA methylation file
idkey <- idkey[!duplicated(idkey$mwas_id),]
# keep samples with only Methylation, braaksc
clinical <- unique(merge (clinical, idkey, by = "projid"))
clinical <- clinical[!is.na(clinical$mwas_id),]
clinical <- clinical[!is.na(clinical$braaksc),]
# merge phenoData with clinical data
dim(phenoData) #743 10
## [1] 737 9
# data in both clinical & pheno data
phenoData <- merge(phenoData, clinical, by.x = "Sample", by.y = "mwas_id")
phenoData <- unique(subset(phenoData, select = Sample:cogdx))
betaSet <- betaSet[,colnames(betaSet) %in% phenoData$Sample]
phenoData <- phenoData[match(colnames(betaSet),phenoData$Sample),]
RGSet <- RGSet[,colnames(RGSet) %in% phenoData$Sentrix]
RGSet <- RGSet[,match(phenoData$Sentrix,colnames(RGSet))]
identical(colnames(betaSet),phenoData$Sample)
## [1] TRUE
identical(colnames(RGSet),phenoData$Sentrix)
## [1] TRUE
save(betaSet,
phenoData,
RGSet,
file = paste0(data.dir.clinical.filter, "/ROSMAPbetaMatrixraw734ind.rda"))
load(paste0(data.dir.clinical.filter, "/ROSMAPbetaMatrixraw734ind.rda"))
nb.samples.with.clinical <- ncol(betaSet)
dim(phenoData)
## [1] 734 25
dim(betaSet)
## [1] 485512 734
detP <- detectionP(RGSet, type = "m+u")
failed.01 <- detP > 0.01
passedProbes <- rownames(failed.01)[rowMeans(failed.01) == 0]
sum(rowMeans(failed.01) == 0)
betaSet <- betaSet[passedProbes, ]
dim(betaSet)
nb.probes.detectP <- nrow(betaSet)
###### (b) keep only probes that start with "cg"
betaSet <- subset (betaSet, substr(row.names(betaSet),1,2) == "cg")
dim(betaSet)
nb.probes.detectP.cg <- nrow(betaSet)
##### 2. drop probes that are on X/Y ###########################################
##### 3. drop probes where SNP with MAF >= 0.01 in the last 5 bp of the probe ##
library(DMRcate)
betaSet <- rmSNPandCH(object = betaSet,
dist = 5,
mafcut = 0.01,
and = TRUE,
rmcrosshyb = FALSE,
rmXY = TRUE)
nb.probes.cg.dmrcate <- nrow(betaSet)
dim(betaSet)
## [1] 453782 734
####### Save File
save(betaSet,
nb.probes.detectP.cg,
nb.probes.detectP,
nb.probes.cg.dmrcate,
phenoData,
file = paste0(data.dir.probes.qc, "/betas_CG_XY_SNPfiltered.rda"))
load(paste0(data.dir.probes.qc, "/betas_CG_XY_SNPfiltered.rda"))
dim(phenoData)
## [1] 734 25
dim(betaSet)
## [1] 431803 734
Input:
Output:
##### 5. BMIQ ##################################################################
library(wateRmelon)
library(RPMM)
library(sesame)
library(sesameData)
library(lumi)
betaQN <- lumiN(x.lumi = betaSet, method = "quantile")
### Order annotation in the same order as beta matrix
annotType <- sesameDataGet("HM450.hg19.manifest")
annotType$designTypeNumeric <- ifelse(annotType$designType == "I",1,2)
### Density plot for type I and type II probes
library(sm)
betaQNCompleteCol1 <- betaQN[complete.cases(betaQN[,1]), ]
annotTypeCompleteCol1 <- annotType[row.names(betaQNCompleteCol1), ]
sm.density.compare(
betaQNCompleteCol1[,1],
annotTypeCompleteCol1$designTypeNumeric
)
type12 <- annotType$designTypeNumeric[match(rownames(betaQN),names(annotType))]
### BMIQ
set.seed (946)
betaQN_BMIQ <- apply(
betaQN, 2,
function(x){
norm_ls <- BMIQ(x, design.v = type12, plots = FALSE)
return (norm_ls$nbeta)
}
)
saveRDS(betaQN_BMIQ, file.path(data.dir.probes.normalization,"ROSMAP_QNBMIQ.rds"))
saveRDS(phenoData, file.path(data.dir.probes.normalization,"pheno.rds"))
betaQN_BMIQ <- readRDS( file.path(data.dir.probes.normalization,"ROSMAP_QNBMIQ.rds"))
dim(betaQN_BMIQ)
## [1] 431803 734
phenoData <- readRDS(file.path(data.dir.probes.normalization,"pheno.rds"))
dim(phenoData)
## [1] 734 25
Description:
Input:
Output:
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
## Sourcing https://gist.githubusercontent.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e/raw/a14424662da343c1301b7b2f03210d28d16ae05c/functions.R
## SHA-1 hash of file is ef6f39dc4e5eddb5ca1c6e5af321e75ff06e9362
beta_mat <- readRDS( file.path(data.dir.probes.normalization,"ROSMAP_QNBMIQ.rds"))
dim(beta_mat)
## [1] 431803 734
phenoData <- readRDS(file.path(data.dir.probes.normalization,"pheno.rds"))
dim(phenoData)
## [1] 734 25
identical(colnames(beta_mat), as.character(phenoData$Sample))
## [1] TRUE
# transform to m values
mvalue_mat <- log2(beta_mat / (1 - beta_mat))
phenoData$braaksc <- as.numeric(as.character(phenoData$braaksc))
phenoData$stage3 <- phenoData$braaksc
phenoData$stage3[phenoData$braaksc <= 2] <- '0-2'
phenoData$stage3[phenoData$braaksc > 2 & phenoData$braaksc < 5] <- '3-4'
phenoData$stage3[phenoData$braaksc >= 5] <- '5-6'
phenoData$sex <- phenoData$msex
phenoData$sex[phenoData$msex == 1] <- 'Male'
phenoData$sex[phenoData$msex == 0] <- 'Female'
phenoData$batch <- as.factor(phenoData$batch)
phenoData$apoe_genotype <- as.factor(phenoData$apoe_genotype)
phenoData$race <- as.factor(phenoData$race)
phenoData$spanish <- as.factor(phenoData$spanish)
## 2. first order matrix by most variable probes on top
betaOrd_mat <- OrderDataBySd(beta_mat)
mOrd_mat <- OrderDataBySd(mvalue_mat)
betaOrd_matPlot <- betaOrd_mat[, as.character(phenoData$Sample)]
mOrd_matPlot <- mOrd_mat[, as.character(phenoData$Sample)]
identical(as.character(phenoData$Sample), colnames(betaOrd_matPlot))
## [1] TRUE
identical(as.character(phenoData$Sample), colnames(mOrd_matPlot))
## [1] TRUE
expSorted_mat = betaOrd_mat
pca <- prcomp(
t(expSorted_mat[1:50000,]),
scale = TRUE
)
d <- data.frame(PC1 = pca$x[,1], PC2 = pca$x[,2])
meanPC1 <- mean(d$PC1)
sdPC1 <- sd(d$PC1)
meanPC2 <- mean(d$PC2)
sdPC2 <- sd(d$PC2)
out3sdPC1_1 <- meanPC1 - 3 * sdPC1
out3sdPC1_2 <- meanPC1 + 3 * sdPC1
out3sdPC2_1 <- meanPC2 - 3 * sdPC2
out3sdPC2_2 <- meanPC2 + 3 * sdPC2
d$outlier_PC1[d$PC1 >= out3sdPC1_1 & d$PC1 <= out3sdPC1_2] <- 0
d$outlier_PC1[d$PC1 < out3sdPC1_1 | d$PC1 > out3sdPC1_2] <- 1
d$outlier_PC2[d$PC2 >= out3sdPC2_1 & d$PC2 <= out3sdPC2_2] <- 0
d$outlier_PC2[d$PC2 < out3sdPC2_1 | d$PC2 > out3sdPC2_2] <- 1
write.csv(d, file.path(data.dir.pca,"ROSMAP_PCs_usingBetas.csv"))
### 2. pca plots
library(ggplot2)
library(ggrepel)
phenoData$sample <- phenoData$Sample
noOutliers <- d[which(d$outlier_PC1 == 0 & d$outlier_PC2 == 0), ]
betaQN_BMIQ_PCfiltered <- beta_mat[, rownames(noOutliers)]
dim(betaQN_BMIQ_PCfiltered)
saveRDS(betaQN_BMIQ_PCfiltered, file.path(data.dir.pca,"ROSMAP_QNBMIQ_PCfiltered.RDS"))
phenoData <- phenoData [phenoData$Sample %in% rownames(noOutliers) ,]
dim(phenoData)
saveRDS(phenoData, file.path(data.dir.pca,"pheno_PCfiltered.RDS"))
betaQN_BMIQ_PCfiltered <- readRDS(file.path(data.dir.pca,"ROSMAP_QNBMIQ_PCfiltered.RDS"))
nb.samples.with.clinical.after.pca <- ncol(betaQN_BMIQ_PCfiltered)
dim(betaQN_BMIQ_PCfiltered)
## [1] 431803 726
phenoData <- readRDS(file.path(data.dir.pca,"pheno_PCfiltered.RDS"))
dim(phenoData)
## [1] 726 27
df.samples <- data.frame("Number of samples" = c(nb.samples,
nb.samples.bc.filtered,
nb.samples.with.slide,
nb.samples.with.clinical,
nb.samples.with.clinical.after.pca),
"Description" = c("total number of samples",
"samples with bisulfate conversion > 88",
"samples with slide information",
"samples with clinical data",
"Samples after PCA"),
"Difference" = c("-",
nb.samples.bc.filtered - nb.samples ,
nb.samples.with.slide - nb.samples.bc.filtered ,
nb.samples.with.clinical - nb.samples.with.slide ,
nb.samples.with.clinical.after.pca - nb.samples.with.clinical)
)
df.samples
# Create summary table
df.probes <- data.frame("Number of probes" = c(nb.probes,
nb.probes.detectP,
nb.probes.detectP.cg,
nb.probes.cg.dmrcate),
"Description" = c("total number of probes in raw data",
"detection P < 0.01",
"only probes that start with cg",
"DMRcate"),
"Difference" = c("-",
nb.probes.detectP - nb.probes ,
nb.probes.detectP.cg - nb.probes.detectP,
nb.probes.cg.dmrcate - nb.probes.detectP.cg)
)
df.probes
save(df.samples,df.probes,file = file.path(data.dir.table, "ROSMAP_table.rda"))
Data from https://www.tandfonline.com/doi/full/10.4161/epi.23924
objects <- load("../../CET/CETS_Image.RData")
objects
idx <- list(
controlNeuron = pdBrain$celltype == "N" & pdBrain$diag == "Control" & pdBrain$ethnicity == "Caucasian",
controlGlia = pdBrain$celltype == "G" & pdBrain$diag == "Control" & pdBrain$ethnicity == "Caucasian"
)
refProfile <- getReference(brain, idx)
head(refProfile)
##### 2. Estimate proportions of neurons in PFC samples ########################
### Limit to 10,000 cpgs in the refProfile dataset
pfc <- readRDS(file.path(data.dir.pca,"ROSMAP_QNBMIQ_PCfiltered.RDS"))
selected <- rownames(pfc) %in% rownames(refProfile)
table(selected)
pfc.refcpgs <- pfc[selected, ]
### Estimate proportion of neurons
prop <- data.frame(estProportion(pfc.refcpgs, profile = refProfile))
colnames(prop) <- "prop.neuron"
##### 3. Merge pfc.refcpgs with phenotype file #################################
pheno <- readRDS(file.path(data.dir.pca,"pheno_PCfiltered.RDS"))
pheno_final <- merge(
pheno,
prop,
by.x = "Sample",
by.y = "row.names"
)
saveRDS(pheno_final, paste0(data.dir.neuron, "pheno_PFC_withNeuronProp_df.RDS"))
beta_mat <- readRDS(file.path(data.dir.pca,"ROSMAP_QNBMIQ_PCfiltered.RDS"))
pheno_df <- readRDS(paste0(data.dir.neuron, "pheno_PFC_withNeuronProp_df.RDS"))
### Compute M values
mval_mat <- log2(beta_mat / (1 - beta_mat))
pheno_df <- pheno_df[match(colnames(mval_mat),pheno_df$Sample),]
identical(pheno_df$Sample, colnames(mval_mat))
## [1] TRUE
pheno_df$age.brain <- pheno_df$age_death
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$Slide <- as.factor(pheno_df$Slide) ### ???
pheno_df$batch <- as.factor(pheno_df$batch)
# If rosmap cohort, don't forget batch effect
is(pheno_df$braaksc,"numeric")
## [1] TRUE
is(pheno_df$age.brain,"numeric")
## [1] TRUE
is(pheno_df$prop.neuron,"numeric")
## [1] TRUE
#str(pheno_df)
predictors_char <- "braaksc"
covariates_char <- c("age.brain", "sex", "prop.neuron", "Slide", "batch")
doParallel::registerDoParallel(cores = 20)
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
results_ordered_df <- plyr::adply(mval_mat,1, function(row){
sumOneRegion_df <- as.data.frame(t(row))
result <- TestSingleRegion(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
sumOneRegion_df = sumOneRegion_df
)
result
}, .progress = "time",.parallel = TRUE,.id = "cpg")
colnames(results_ordered_df)[1] <- "cpg"
identical(row.names(mval_mat), results_ordered_df$cpg %>% as.character())
results_ordered_df$fdr <- p.adjust(
results_ordered_df$pValue,
method = "fdr"
)
write.csv(
results_ordered_df,
paste0(data.dir.single.cpg.pval, "ROSMAP_PFC_single_cpg_pVal_df.csv"),
row.names = FALSE
)
results_ordered_df <- readr::read_csv(paste0(data.dir.single.cpg.pval, "ROSMAP_PFC_single_cpg_pVal_df.csv"))
## Parsed with column specification:
## cols(
## cpg = col_character(),
## Estimate = col_double(),
## StdErr = col_double(),
## pValue = col_double(),
## fdr = col_double()
## )
results_ordered_df
Input:
Output:
##### 1. Import datasets #######################################################
beta_mat <- readRDS(grep("QNBMIQ",dir(data.dir.pca,full.names = T),ignore.case = T,value = T))
pheno_df <- readRDS(dir(data.dir.neuron,full.names = T))
### Compute M values
mvalue_mat <- log2(beta_mat / (1 - beta_mat))
### Reorder samples based on pheno_df
mvalue_mat <- mvalue_mat[, pheno_df$Sample]
identical(colnames(mvalue_mat), pheno_df$Sample)
### Take residuals
lmF <- function(mval){
fitE <- lm(
as.numeric(mval) ~ age_death + msex + prop.neuron + as.character(Slide) + batch, #add batch if rosmap
data = pheno_df,
na.action = na.exclude
)
residuals (fitE)
}
doParallel::registerDoParallel(cores = 4)
resid <- plyr::adply(mvalue_mat,1,.fun = lmF,.progress = "time",.parallel = TRUE)
rownames(resid) <- resid[,1]
resid[,1] <- NULL
colnames(resid) <- colnames(mvalue_mat)
dim(resid)
dim(mvalue_mat)
### Save dataset
saveRDS(
resid,
paste0(data.dir.residuals,
"ROSMAP_QNBMIQ_PCfiltered_mvalResiduals.RDS"
)
)
### Import datasets
mvalue_residuals_mat <- readRDS(
paste0(data.dir.residuals,
"ROSMAP_QNBMIQ_PCfiltered_mvalResiduals.RDS"
)
)
### Call in functions
library(coMethDMR)
library(BiocParallel)
probes.cluster.all <- coMethDMR::getPredefinedCluster(arrayType = "450k",
clusterType = "regions")
### Find co-methylated clusters
ncores <- 6
coMeth_ls <- CoMethAllRegions(
dnam = mvalue_residuals_mat,
betaToM = FALSE,
CpGs_ls = probes.cluster.all,
arrayType = "450k",
rDropThresh_num = 0.4,
minPairwiseCorr = NULL,
method = "spearman",
returnAllCpGs = TRUE,
output = "all",
nCores_int = ncores,
progressbar = FALSE
)
saveRDS(
coMeth_ls,
paste0(data.dir.residuals,
"ROSMAP_residuals_cometh_input_ls.RDS")
)
Input:
Output:
### Import datasets
beta_mat <- beta_mat <- readRDS(dir(data.dir.pca, pattern = "QNBMIQ", full.names = TRUE))
pheno_df <- readRDS(dir(data.dir.neuron,full.names = T))
mval_mat <- log2(beta_mat / (1 - beta_mat)) %>% as.matrix()
coMeth_ls <- readRDS(
paste0(data.dir.residuals,
"ROSMAP_residuals_cometh_input_ls.RDS"
)
)
### Create info dataset
input_cometh <- data.frame(
inputRegion = coMeth_ls$inputRegion_chr,
nCoMethRegion = coMeth_ls$nCoMethRegions_num,
coMethRegion = names(coMeth_ls$coMeth_ls),
nCpGs = unlist(lapply(coMeth_ls$coMeth_ls, length), use.names = FALSE),
stringsAsFactors = FALSE
)
input_cometh_nodup <- input_cometh[
!duplicated(input_cometh$coMethRegion),
]
colnames(input_cometh_nodup) <- c(
paste0(cohort, "_inputRegion"),
paste0(cohort, "_nCoMethRegion"),
paste0(cohort, "_coMethRegion"),
paste0(cohort, "_nCpGs")
)
saveRDS(
input_cometh_nodup,
paste0(data.dir.median, cohort, "_info_df.rds")
)
### Take median of probes in each cluster for each sample
filename <- paste0(data.dir.median, cohort, "_mediansMval_df.rds")
library(robustbase)
mval_mat <- mval_mat[rownames(mval_mat) %in% unlist(coMeth_ls$coMeth_ls),]
if(!file.exists(filename)){
medianMval.df <- plyr::ldply(
coMeth_ls$coMeth_ls[!duplicated(names(coMeth_ls$coMeth_ls))],
function(probes){
colMedians(mval_mat[as.character(probes),], na.rm = TRUE)
},
.progress = "time"
)
medianMval.df$.id <- NULL
colnames(medianMval.df) <- colnames(mval_mat)
saveRDS(medianMval.df, file = filename)
} else {
medianMval.df <- readRDS(filename)
}
### Import datasets
info_df <- readRDS(
paste0(data.dir.median, cohort, "_info_df.rds")
)
mediansMval_df <- readRDS(
paste0(data.dir.median, cohort, "_mediansMval_df.rds")
)
pheno_df <- readRDS(dir(data.dir.neuron,full.names = T))
### Check variables before fitting model
pheno_df$Sample <- pheno_df$Sample_Group
identical(pheno_df$Sample, colnames(mediansMval_df))
## [1] FALSE
#str(pheno_df)
pheno_df$msex <- as.factor(pheno_df$msex)
pheno_df$Slide <- as.factor(pheno_df$Slide)
pheno_df$age.brain <- pheno_df$age_death
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$batch <- as.factor(pheno_df$batch)
# If rosmap cohort, don't forget batch effect
is(pheno_df$braaksc,"numeric")
## [1] TRUE
is(pheno_df$age.brain,"numeric")
## [1] TRUE
is(pheno_df$prop.neuron,"numeric")
## [1] TRUE
#str(pheno_df)
predictors_char <- "braaksc"
covariates_char <- c("age.brain", "msex", "prop.neuron", "Slide", "batch")
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
res_df <- TestAllRegions_noInfo(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
summarizedRegions_df = mediansMval_df,
cores = 1
)
colnames(res_df) <- c(
paste0(cohort, "_estimate"),
paste0(cohort, "_se"),
paste0(cohort, "_pVal"),
paste0(cohort, "_fdr")
)
res_withInfo_df <- cbind(info_df, res_df)
saveRDS(
res_withInfo_df,
paste0(data.dir.median, cohort, "_linear_df.rds")
)
file <- dir(data.dir.median,pattern = paste0(".*linear_df"),
recursive = T,
full.names = TRUE,
ignore.case = T)
file
## [1] "DATASETS//ROSMAP////step11_median//ROSMAP_linear_df.rds"
res_withInfo_df <- readRDS(file)
dim(res_withInfo_df)
## [1] 38679 8
res_withInfo_df
dir(path = data.dir,recursive = T,pattern = ".rda|.csv|.RDS")
## [1] "DNAm/ROSMAPbetaMatrixraw735ind.rda"
## [2] "DNAm/ROSMAPbetaMatrixraw743ind.rda"
## [3] "step1_download/AllIdat/phenoData.RDS"
## [4] "step1_download/Metadata/Bile Acid data dictionary.csv"
## [5] "step1_download/Metadata/ROSMAP_arraymiRNA_covariates.csv"
## [6] "step1_download/Metadata/ROSMAP_assay_proteomicsTMTquantitation_metadata.csv"
## [7] "step1_download/Metadata/ROSMAP_assay_RNAseq_metadata.csv"
## [8] "step1_download/Metadata/ROSMAP_assay_scRNAseq_metadata.csv"
## [9] "step1_download/Metadata/ROSMAP_assay_snpArray_metadata.csv"
## [10] "step1_download/Metadata/ROSMAP_assay_wholeGenomeSeq_metadata.csv"
## [11] "step1_download/Metadata/ROSMAP_biospecimen_metadata.csv"
## [12] "step1_download/Metadata/ROSMAP_Brain_p180_Data_Dictionary.csv"
## [13] "step1_download/Metadata/ROSMAP_ChIPseq_covariates.csv"
## [14] "step1_download/Metadata/ROSMAP_ChIPseq_metaData.csv"
## [15] "step1_download/Metadata/ROSMAP_Clinical_2019-05_v3.csv"
## [16] "step1_download/Metadata/ROSMAP_IDkey.csv"
## [17] "step10_residuals/ROSMAP_QNBMIQ_PCfiltered_mvalResiduals.RDS"
## [18] "step10_residuals/ROSMAP_residuals_cometh_input_ls.RDS"
## [19] "step2_read_minfi/RGSet.RDS"
## [20] "step3_bsConvFilter/RGSet_bsfiltered.rda"
## [21] "step4_clinical_available_filtering/ROSMAPbetaMatrixraw734ind.rda"
## [22] "step4_clinical_available_filtering/ROSMAPbetaMatrixraw737ind.rda"
## [23] "step5_probesQC_filtering/betas_CG_XY_SNPfiltered.rda"
## [24] "step6_normalization/quantro.rda"
## [25] "step7_pca_filtering/pheno_PCfiltered.RDS"
## [26] "step7_pca_filtering/ROSMAP_PCs_usingBetas.csv"
## [27] "step7_pca_filtering/ROSMAP_QNBMIQ_PCfiltered.RDS"
## [28] "step8_neuron_comp/pheno_PFC_withNeuronProp_df.RDS"
## [29] "step9_single_cpg_pval/ROSMAP_PFC_single_cpg_pVal_df.csv"
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
## setting value
## version R version 3.6.1 (2019-07-05)
## os Ubuntu 19.10
## system x86_64, linux-gnu
## ui X11
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/New_York
## date 2020-04-27
##
## ─ Packages ───────────────────────────────────────────────────────────────────
## package * version date lib
## acepack 1.4.1 2016-10-29 [2]
## affy 1.64.0 2019-10-29 [2]
## affyio 1.56.0 2019-10-29 [2]
## annotate 1.64.0 2019-10-29 [2]
## AnnotationDbi * 1.48.0 2019-10-29 [2]
## AnnotationFilter 1.10.0 2019-10-29 [2]
## AnnotationHub * 2.18.0 2019-10-29 [2]
## askpass 1.1 2019-01-13 [2]
## assertthat 0.2.1 2019-03-21 [2]
## backports 1.1.6 2020-04-05 [2]
## base64 2.0 2016-05-10 [2]
## base64enc 0.1-3 2015-07-28 [2]
## beanplot 1.2 2014-09-19 [2]
## Biobase * 2.46.0 2019-10-29 [2]
## BiocFileCache * 1.10.2 2019-11-08 [2]
## BiocGenerics * 0.32.0 2019-10-29 [2]
## BiocManager 1.30.10 2019-11-16 [2]
## BiocParallel * 1.20.1 2019-12-21 [2]
## BiocVersion 3.10.1 2019-06-06 [2]
## biomaRt 2.42.1 2020-03-26 [2]
## Biostrings * 2.54.0 2019-10-29 [2]
## biovizBase 1.34.1 2019-12-04 [2]
## bit 1.1-15.2 2020-02-10 [2]
## bit64 0.9-7 2017-05-08 [2]
## bitops 1.0-6 2013-08-17 [2]
## blob 1.2.1 2020-01-20 [2]
## BSgenome 1.54.0 2019-10-29 [2]
## bsseq 1.22.0 2019-10-29 [2]
## bumphunter * 1.28.0 2019-10-29 [2]
## callr 3.4.3 2020-03-28 [2]
## cellranger 1.1.0 2016-07-27 [2]
## checkmate 2.0.0 2020-02-06 [2]
## cli 2.0.2 2020-02-28 [2]
## cluster * 2.1.0 2019-06-19 [4]
## codetools 0.2-16 2018-12-24 [4]
## colorspace 1.4-1 2019-03-18 [2]
## crayon 1.3.4 2017-09-16 [2]
## curl 4.3 2019-12-02 [2]
## data.table 1.12.8 2019-12-09 [2]
## DBI 1.1.0 2019-12-15 [2]
## dbplyr * 1.4.2 2019-06-17 [2]
## DelayedArray * 0.12.3 2020-04-09 [2]
## DelayedMatrixStats 1.8.0 2019-10-29 [2]
## desc 1.2.0 2018-05-01 [2]
## devtools 2.3.0 2020-04-10 [2]
## dichromat 2.0-0 2013-01-24 [2]
## digest 0.6.25 2020-02-23 [2]
## DMRcate * 2.0.7 2020-01-10 [2]
## DMRcatedata * 2.2.1 2020-02-27 [1]
## DNAcopy 1.60.0 2019-10-29 [2]
## doRNG 1.8.2 2020-01-27 [2]
## dplyr * 0.8.5 2020-03-07 [2]
## DSS 2.34.0 2019-10-29 [2]
## edgeR 3.28.1 2020-02-26 [2]
## ellipsis 0.3.0 2019-09-20 [2]
## ensembldb 2.10.2 2019-11-20 [2]
## evaluate 0.14 2019-05-28 [2]
## ExperimentHub * 1.12.0 2019-10-29 [2]
## fansi 0.4.1 2020-01-08 [2]
## fastmap 1.0.1 2019-10-08 [2]
## FDb.InfiniumMethylation.hg19 * 2.2.0 2019-12-10 [2]
## foreach * 1.5.0 2020-03-30 [2]
## foreign 0.8-76 2020-03-03 [4]
## Formula 1.2-3 2018-05-03 [2]
## fs 1.4.1 2020-04-04 [2]
## genefilter 1.68.0 2019-10-29 [2]
## GenomeInfoDb * 1.22.1 2020-03-27 [2]
## GenomeInfoDbData 1.2.2 2019-12-02 [2]
## GenomicAlignments 1.22.1 2019-11-12 [2]
## GenomicFeatures * 1.38.2 2020-02-15 [2]
## GenomicRanges * 1.38.0 2019-10-29 [2]
## GEOquery 2.54.1 2019-11-18 [2]
## ggplot2 * 3.3.0 2020-03-05 [2]
## ggrepel * 0.8.2 2020-03-08 [2]
## glue 1.4.0 2020-04-03 [2]
## gridExtra 2.3 2017-09-09 [2]
## gtable 0.3.0 2019-03-25 [2]
## gtools 3.8.2 2020-03-31 [2]
## Gviz 1.30.3 2020-02-17 [2]
## HDF5Array 1.14.3 2020-02-28 [2]
## Hmisc 4.4-0 2020-03-23 [2]
## hms 0.5.3 2020-01-08 [2]
## htmlTable 1.13.3 2019-12-04 [2]
## htmltools 0.4.0 2019-10-04 [2]
## htmlwidgets 1.5.1 2019-10-08 [2]
## httpuv 1.5.2 2019-09-11 [2]
## httr 1.4.1 2019-08-05 [2]
## IlluminaHumanMethylation450kanno.ilmn12.hg19 * 0.6.0 2019-11-08 [2]
## IlluminaHumanMethylationEPICanno.ilm10b4.hg19 0.6.0 2020-01-09 [2]
## illuminaio * 0.28.0 2019-10-29 [2]
## interactiveDisplayBase 1.24.0 2019-10-29 [2]
## IRanges * 2.20.2 2020-01-13 [2]
## iterators * 1.0.12 2019-07-26 [2]
## jpeg 0.1-8.1 2019-10-24 [2]
## jsonlite 1.6.1 2020-02-02 [2]
## KernSmooth 2.23-16 2019-10-15 [4]
## knitr 1.28 2020-02-06 [2]
## later 1.0.0 2019-10-04 [2]
## lattice 0.20-41 2020-04-02 [4]
## latticeExtra 0.6-29 2019-12-19 [2]
## lazyeval 0.2.2 2019-03-15 [2]
## lifecycle 0.2.0 2020-03-06 [2]
## limma * 3.42.2 2020-02-03 [2]
## locfit * 1.5-9.4 2020-03-25 [2]
## lumi * 2.38.0 2019-10-29 [2]
## magrittr 1.5 2014-11-22 [2]
## MASS 7.3-51.5 2019-12-20 [4]
## Matrix 1.2-18 2019-11-27 [2]
## matrixStats * 0.56.0 2020-03-13 [2]
## mclust 5.4.6 2020-04-11 [2]
## memoise 1.1.0 2017-04-21 [2]
## methylumi * 2.32.0 2019-10-29 [2]
## mgcv 1.8-31 2019-11-09 [4]
## mime 0.9 2020-02-04 [2]
## minfi * 1.32.0 2019-10-29 [2]
## missMethyl 1.21.4 2020-02-03 [2]
## multtest 2.42.0 2019-10-29 [2]
## munsell 0.5.0 2018-06-12 [2]
## nleqslv 3.3.2 2018-05-17 [2]
## nlme 3.1-147 2020-04-13 [4]
## nnet 7.3-13 2020-02-25 [4]
## nor1mix 1.3-0 2019-06-13 [2]
## openssl 1.4.1 2019-07-18 [2]
## org.Hs.eg.db * 3.10.0 2019-12-02 [2]
## permute 0.9-5 2019-03-12 [2]
## pillar 1.4.3 2019-12-20 [2]
## pkgbuild 1.0.6 2019-10-09 [2]
## pkgconfig 2.0.3 2019-09-22 [2]
## pkgload 1.0.2 2018-10-29 [2]
## plyr 1.8.6 2020-03-03 [2]
## png 0.1-7 2013-12-03 [2]
## preprocessCore 1.48.0 2019-10-29 [2]
## prettyunits 1.1.1 2020-01-24 [2]
## processx 3.4.2 2020-02-09 [2]
## progress 1.2.2 2019-05-16 [2]
## promises 1.1.0 2019-10-04 [2]
## ProtGenerics 1.18.0 2019-10-29 [2]
## ps 1.3.2 2020-02-13 [2]
## purrr 0.3.3 2019-10-18 [2]
## quadprog 1.5-8 2019-11-20 [2]
## R.methodsS3 1.8.0 2020-02-14 [2]
## R.oo 1.23.0 2019-11-03 [2]
## R.utils 2.9.2 2019-12-08 [2]
## R6 2.4.1 2019-11-12 [2]
## randomForest 4.6-14 2018-03-25 [2]
## rappdirs 0.3.1 2016-03-28 [2]
## RColorBrewer 1.1-2 2014-12-07 [2]
## Rcpp 1.0.4.6 2020-04-09 [2]
## RCurl 1.98-1.1 2020-01-19 [2]
## readr 1.3.1 2018-12-21 [2]
## readxl 1.3.1 2019-03-13 [2]
## remotes 2.1.1 2020-02-15 [2]
## reshape 0.8.8 2018-10-23 [2]
## reshape2 * 1.4.4 2020-04-09 [2]
## rhdf5 2.30.1 2019-11-26 [2]
## Rhdf5lib 1.8.0 2019-10-29 [2]
## rlang 0.4.5 2020-03-01 [2]
## rmarkdown 2.1 2020-01-20 [2]
## rngtools 1.5 2020-01-23 [2]
## ROC * 1.62.0 2019-10-29 [2]
## rpart 4.1-15 2019-04-12 [4]
## RPMM * 1.25 2017-02-28 [2]
## rprojroot 1.3-2 2018-01-03 [2]
## Rsamtools 2.2.3 2020-02-23 [2]
## RSQLite 2.2.0 2020-01-07 [2]
## rstudioapi 0.11 2020-02-07 [2]
## rtracklayer 1.46.0 2019-10-29 [2]
## S4Vectors * 0.24.4 2020-04-09 [2]
## scales * 1.1.0 2019-11-18 [2]
## scrime 1.3.5 2018-12-01 [2]
## sesame * 1.4.0 2019-10-29 [2]
## sesameData * 1.4.0 2019-11-05 [2]
## sessioninfo 1.1.1 2018-11-05 [2]
## shiny 1.4.0.2 2020-03-13 [2]
## siggenes 1.60.0 2019-10-29 [2]
## statmod 1.4.34 2020-02-17 [2]
## stringi 1.4.6 2020-02-17 [2]
## stringr 1.4.0 2019-02-10 [2]
## SummarizedExperiment * 1.16.1 2019-12-19 [2]
## survival 3.1-12 2020-04-10 [4]
## testthat 2.3.2 2020-03-02 [2]
## tibble 3.0.0 2020-03-30 [2]
## tidyr 1.0.2 2020-01-24 [2]
## tidyselect 1.0.0 2020-01-27 [2]
## TxDb.Hsapiens.UCSC.hg19.knownGene * 3.2.2 2019-12-10 [2]
## usethis 1.6.0 2020-04-09 [2]
## VariantAnnotation 1.32.0 2019-10-29 [2]
## vctrs 0.2.4 2020-03-10 [2]
## wateRmelon * 1.30.0 2019-10-29 [2]
## wheatmap 0.1.0 2018-03-15 [2]
## withr 2.1.2 2018-03-15 [2]
## xfun 0.13 2020-04-13 [2]
## XML 3.99-0.3 2020-01-20 [2]
## xml2 1.3.1 2020-04-09 [2]
## xtable 1.8-4 2019-04-21 [2]
## XVector * 0.26.0 2019-10-29 [2]
## yaml 2.2.1 2020-02-01 [2]
## zlibbioc 1.32.0 2019-10-29 [2]
## source
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.0)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Github (Oshlack/missMethyl@5792fbc)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## CRAN (R 3.6.1)
## Bioconductor
## CRAN (R 3.6.1)
## Bioconductor
##
## [1] /home/tiagochst/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library