1 Data retrival

library(minfi)
library(dplyr)

cohort <- "ROSMAP"
data.dir <- file.path("DATASETS/",cohort,"/") 
data.dir.table <- "DATASETS/Summary_Table/" 
data.dir.raw <- file.path(data.dir,"/step1_download/") 
data.dir.raw.idat <- file.path(data.dir.raw, "AllIdat")
data.dir.raw.metadata <- file.path(data.dir.raw, "Metadata")
data.dir.read <- file.path(data.dir,"/step2_read_minfi/") 
data.dir.bsfilter <- file.path(data.dir,"/step3_bsConvFilter/") 
data.dir.clinical.filter <- file.path(data.dir,"/step4_clinical_available_filtering/") 
data.dir.probes.qc <- file.path(data.dir,"/step5_probesQC_filtering/") 
data.dir.probes.normalization <- file.path(data.dir,"/step6_normalization/") 
data.dir.pca <- file.path(data.dir,"/step7_pca_filtering/") 
data.dir.neuron <- file.path(data.dir,"/step8_neuron_comp/") 
data.dir.single.cpg.pval <- file.path(data.dir,"/step9_single_cpg_pval/") 
data.dir.residuals <- file.path(data.dir,"/step10_residuals/") 
data.dir.median <- file.path(data.dir,"/step11_median/") 
for(p in grep("dir",ls(),value = T)) dir.create(get(p),recursive = TRUE,showWarnings = FALSE)

1.1 Download data

Required R libraries: synapserutils and synapser can be installed as following:

install.packages("synapserutils", repos = c("http://ran.synapse.org", "http://cran.fhcrc.org"))
install.packages("synapser", repos = c("http://ran.synapse.org", "http://cran.fhcrc.org"))

library(synapser)
library(synapserutils)

# Please login to synapse using
# synLogin()

#-----------------------------------------------
# Metadata
#-----------------------------------------------
# ROSMAP_arrayMethylation_covariates.tsv
syn3157322 = syncFromSynapse(entity = 'syn3157322',
                             ifcollision = "keep.local",
                             path = data.dir.raw.metadata)

#-----------------------------------------------
# DNA methylation
#-----------------------------------------------

# ROSMAP_arrayMethylation_imputed.tsv.gz
syn3168763 <- syncFromSynapse('syn3168763',
                               ifcollision = "keep.local",
                               path = data.dir.raw)

# ROSMAP_arrayMethylation_metaData.tsv
syn3168775 = syncFromSynapse(entity = 'syn3168775',
                             ifcollision = "keep.local",
                             path =  data.dir.raw)

# ROSMAP_arrayMethylation_raw.gz
syn5850422 = syncFromSynapse(entity = 'syn5850422',
                             ifcollision = "keep.local",
                             path = data.dir.raw)

syn7357283 <- syncFromSynapse(entity = 'syn7357283', 
                              ifcollision = "keep.local",
                              path = data.dir.raw.idat)

2 Data Pre-processing

Description:

Read in idat files
Remove duplicated samples

Input:

idat files

Output:

RGSet.RDS
phenoData
BetaMatrixRaw

RGSet <- read.metharray.exp(
  base = file.path(data.dir.raw, "AllIdat"),
  recursive = TRUE,
  verbose = TRUE
)
saveRDS(RGSet, paste0(data.dir.read, "RGSet.RDS"))
#dnam.imputed <-
#  readr::read_tsv(file.path(dir.dnam, "ROSMAP_arrayMethylation_imputed.tsv.gz"))
probes.metadata <- readr::read_tsv(file.path(data.dir.raw, "ROSMAP_arrayMethylation_metaData.tsv"))
phenoData <- readr::read_tsv(file.path(data.dir.raw, "ROSMAP_arrayMethylation_covariates.tsv"))
phenoData$Sentrix <- paste0(phenoData$Sentrix_ID, "_", phenoData$Sentrix_Position)
phenoData$Slide <- phenoData$Sentrix_ID

##### 5. BMIQ ##################################################################
library(wateRmelon)
library(RPMM)
library(sesame)
library(sesameData)

RGSet <- readRDS(file = paste0(data.dir.read, "RGSet.RDS"))
bs <- data.frame(bisulfiteConversion = bscon(RGSet))
bsFilteredOut <- row.names(bs)[bs$bisulfiteConversion < 88]
nb.samples <- ncol(RGSet)
RGSet <- RGSet[,!colnames(RGSet) %in% bsFilteredOut]
nb.samples.bc.filtered <-  ncol(RGSet)

save(RGSet,
     nb.samples,
     bs,
     nb.samples.bc.filtered, 
     file = paste0(data.dir.bsfilter, "RGSet_bsfiltered.rda"))

betaSet <- getBeta(RGSet)
identical(colnames(betaSet),phenoData$Sentrix) ##TRUE
colnames(betaSet) <- phenoData$Sample
save(betaSet,
     RGSet,
     phenoData,
     file =  paste0(data.dir.clinical.filter, "/ROSMAPbetaMatrixraw737ind.rda"))

3 Sample subset

load(paste0(data.dir.clinical.filter, "/ROSMAPbetaMatrixraw737ind.rda"))
nb.samples.with.slide <- ncol(betaSet)
nb.probes <- nrow(betaSet)

#--------------------------------------------------
# Read metadata
#--------------------------------------------------
clinical <- readr::read_csv(
  file.path(data.dir.raw.metadata,"ROSMAP_Clinical_2019-05_v3.csv"),
  col_types = readr::cols())

# change 90+ to 90
clinical$age_death <- as.numeric(gsub("\\+","",clinical$age_death))

biospecimen <- readr::read_csv(
  file.path(data.dir.raw.metadata,"ROSMAP_biospecimen_metadata.csv"),
  col_types = readr::cols()
  )
idkey <- readr::read_csv(
  file.path(data.dir.raw.metadata,"ROSMAP_IDkey.csv"),
  col_types = readr::cols()
  )
#--------------------------------------------------

# keep only one entries with DNA methylation
idkey <- idkey[idkey$mwas_id %in% colnames(betaSet),]

# keep only one entry for each DNA methylation file 
idkey <- idkey[!duplicated(idkey$mwas_id),]

# keep samples with only Methylation, braaksc 
clinical <- unique(merge (clinical, idkey, by = "projid"))
clinical <- clinical[!is.na(clinical$mwas_id),]
clinical <- clinical[!is.na(clinical$braaksc),]
# merge phenoData with clinical data
dim(phenoData) #743  10

## [1] 737   9

# data in both clinical & pheno data
phenoData <- merge(phenoData, clinical, by.x = "Sample", by.y = "mwas_id")
phenoData <- unique(subset(phenoData, select = Sample:cogdx))

betaSet <- betaSet[,colnames(betaSet) %in% phenoData$Sample]
phenoData <- phenoData[match(colnames(betaSet),phenoData$Sample),]
RGSet <- RGSet[,colnames(RGSet) %in% phenoData$Sentrix]
RGSet <- RGSet[,match(phenoData$Sentrix,colnames(RGSet))]

identical(colnames(betaSet),phenoData$Sample)

## [1] TRUE

identical(colnames(RGSet),phenoData$Sentrix)

## [1] TRUE

save(betaSet,
     phenoData,
     RGSet,
     file =  paste0(data.dir.clinical.filter, "/ROSMAPbetaMatrixraw734ind.rda"))

load(paste0(data.dir.clinical.filter, "/ROSMAPbetaMatrixraw734ind.rda"))
nb.samples.with.clinical <- ncol(betaSet)
dim(phenoData)

## [1] 734  25

dim(betaSet)

## [1] 485512    734

detP <- detectionP(RGSet, type = "m+u")
failed.01 <- detP > 0.01
passedProbes <- rownames(failed.01)[rowMeans(failed.01) == 0]
sum(rowMeans(failed.01) == 0)  

betaSet <- betaSet[passedProbes, ]

dim(betaSet)
nb.probes.detectP <- nrow(betaSet)

###### (b) keep only probes that start with "cg"
betaSet <- subset (betaSet, substr(row.names(betaSet),1,2) == "cg")
dim(betaSet)
nb.probes.detectP.cg <- nrow(betaSet)

##### 2. drop probes that are on X/Y ###########################################
##### 3. drop probes where SNP with MAF >= 0.01 in the last 5 bp of the probe ##
library(DMRcate)

betaSet <- rmSNPandCH(object = betaSet,
                      dist = 5, 
                      mafcut = 0.01, 
                      and = TRUE,
                      rmcrosshyb = FALSE,
                      rmXY = TRUE)
nb.probes.cg.dmrcate <- nrow(betaSet)
dim(betaSet)

## [1] 453782    734

####### Save File
save(betaSet,
     nb.probes.detectP.cg,
     nb.probes.detectP,
     nb.probes.cg.dmrcate,
     phenoData,
     file =  paste0(data.dir.probes.qc, "/betas_CG_XY_SNPfiltered.rda"))

load(paste0(data.dir.probes.qc, "/betas_CG_XY_SNPfiltered.rda"))

dim(phenoData)

## [1] 734  25

dim(betaSet)

## [1] 431803    734

4 Normalization

Quantile normalization and BMIQ normalization

Input:

beta_CG_XY_SNPfiltered_mat.RDS
RGSet.RDS
pheno_df.RDS
full.annot.RDS

Output:

bs.csv
pheno_df.RDS
QNBMIQ.RDS

##### 5. BMIQ ##################################################################
library(wateRmelon)
library(RPMM)
library(sesame)
library(sesameData)

4.1 Quantile normalization

library(lumi)
betaQN <- lumiN(x.lumi = betaSet, method = "quantile")

### Order annotation in the same order as beta matrix
annotType <- sesameDataGet("HM450.hg19.manifest")
annotType$designTypeNumeric <- ifelse(annotType$designType == "I",1,2)

### Density plot for type I and type II probes
library(sm)

betaQNCompleteCol1 <- betaQN[complete.cases(betaQN[,1]), ]
annotTypeCompleteCol1 <- annotType[row.names(betaQNCompleteCol1), ]

sm.density.compare(
  betaQNCompleteCol1[,1],
  annotTypeCompleteCol1$designTypeNumeric
)

type12 <- annotType$designTypeNumeric[match(rownames(betaQN),names(annotType))]

### BMIQ
set.seed (946)
betaQN_BMIQ <- apply(
  betaQN, 2,
  function(x){
    norm_ls <- BMIQ(x, design.v = type12, plots = FALSE)
    return (norm_ls$nbeta)
  }
)
saveRDS(betaQN_BMIQ, file.path(data.dir.probes.normalization,"ROSMAP_QNBMIQ.rds"))
saveRDS(phenoData, file.path(data.dir.probes.normalization,"pheno.rds"))

betaQN_BMIQ <- readRDS( file.path(data.dir.probes.normalization,"ROSMAP_QNBMIQ.rds"))
dim(betaQN_BMIQ)

## [1] 431803    734

phenoData <- readRDS(file.path(data.dir.probes.normalization,"pheno.rds"))
dim(phenoData)

## [1] 734  25

5 Outliers detection - PCA analysis

Description:

estimate standard deviation for each probe
select most variable probes (e.g. n = 50,000)
pca analysis
Filter outliers

Input:

QNBMIQ.rds
pheno_df.RDS

Output:

PCs_usingBetas.csv,
PCA plots
QNBMIQ_PCfiltered.RDS
pheno_df.RDS

devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")

## Sourcing https://gist.githubusercontent.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e/raw/a14424662da343c1301b7b2f03210d28d16ae05c/functions.R

## SHA-1 hash of file is ef6f39dc4e5eddb5ca1c6e5af321e75ff06e9362

beta_mat <- readRDS( file.path(data.dir.probes.normalization,"ROSMAP_QNBMIQ.rds"))
dim(beta_mat)

## [1] 431803    734

phenoData <- readRDS(file.path(data.dir.probes.normalization,"pheno.rds"))
dim(phenoData)

## [1] 734  25

identical(colnames(beta_mat), as.character(phenoData$Sample))

## [1] TRUE

# transform to m values
mvalue_mat <- log2(beta_mat / (1 - beta_mat))

phenoData$braaksc <- as.numeric(as.character(phenoData$braaksc))

phenoData$stage3 <- phenoData$braaksc
phenoData$stage3[phenoData$braaksc <= 2] <- '0-2'
phenoData$stage3[phenoData$braaksc > 2 & phenoData$braaksc < 5] <- '3-4'
phenoData$stage3[phenoData$braaksc >= 5] <- '5-6'

phenoData$sex <- phenoData$msex
phenoData$sex[phenoData$msex == 1] <- 'Male'
phenoData$sex[phenoData$msex == 0] <- 'Female'

phenoData$batch <- as.factor(phenoData$batch)
phenoData$apoe_genotype <- as.factor(phenoData$apoe_genotype)
phenoData$race <- as.factor(phenoData$race)
phenoData$spanish <- as.factor(phenoData$spanish)


## 2. first order matrix by most variable probes on top

betaOrd_mat <- OrderDataBySd(beta_mat)

mOrd_mat <- OrderDataBySd(mvalue_mat)

betaOrd_matPlot <- betaOrd_mat[, as.character(phenoData$Sample)]
mOrd_matPlot <- mOrd_mat[, as.character(phenoData$Sample)]
identical(as.character(phenoData$Sample), colnames(betaOrd_matPlot))

## [1] TRUE

identical(as.character(phenoData$Sample), colnames(mOrd_matPlot))

## [1] TRUE

expSorted_mat = betaOrd_mat

pca <- prcomp(
  t(expSorted_mat[1:50000,]),
  scale = TRUE
)

d <- data.frame(PC1 = pca$x[,1], PC2 = pca$x[,2])

meanPC1 <- mean(d$PC1)
sdPC1   <- sd(d$PC1)

meanPC2 <- mean(d$PC2)
sdPC2   <- sd(d$PC2)


out3sdPC1_1 <- meanPC1 - 3 * sdPC1
out3sdPC1_2 <- meanPC1 + 3 * sdPC1

out3sdPC2_1 <- meanPC2 - 3 * sdPC2
out3sdPC2_2 <- meanPC2 + 3 * sdPC2

d$outlier_PC1[d$PC1 >= out3sdPC1_1 & d$PC1 <= out3sdPC1_2] <- 0
d$outlier_PC1[d$PC1 < out3sdPC1_1 | d$PC1 > out3sdPC1_2] <- 1

d$outlier_PC2[d$PC2 >= out3sdPC2_1 & d$PC2 <= out3sdPC2_2] <- 0
d$outlier_PC2[d$PC2 < out3sdPC2_1 | d$PC2 > out3sdPC2_2] <- 1

write.csv(d, file.path(data.dir.pca,"ROSMAP_PCs_usingBetas.csv"))

### 2. pca plots
library(ggplot2)
library(ggrepel)
phenoData$sample <- phenoData$Sample

5.1 Filter samples by PCA, SAVE files

noOutliers <- d[which(d$outlier_PC1 == 0 & d$outlier_PC2 == 0), ]
betaQN_BMIQ_PCfiltered <- beta_mat[, rownames(noOutliers)]
dim(betaQN_BMIQ_PCfiltered)

saveRDS(betaQN_BMIQ_PCfiltered, file.path(data.dir.pca,"ROSMAP_QNBMIQ_PCfiltered.RDS"))

phenoData <- phenoData [phenoData$Sample %in% rownames(noOutliers) ,]
dim(phenoData)
saveRDS(phenoData, file.path(data.dir.pca,"pheno_PCfiltered.RDS"))

6 Summary after QC steps

6.1 Data and metadata

betaQN_BMIQ_PCfiltered <- readRDS(file.path(data.dir.pca,"ROSMAP_QNBMIQ_PCfiltered.RDS")) 
nb.samples.with.clinical.after.pca <- ncol(betaQN_BMIQ_PCfiltered)
dim(betaQN_BMIQ_PCfiltered)

## [1] 431803    726

phenoData <- readRDS(file.path(data.dir.pca,"pheno_PCfiltered.RDS")) 
dim(phenoData)

## [1] 726  27

6.2 Numbers of samples and probes removed in each step

df.samples <- data.frame("Number of samples" =  c(nb.samples, 
                                                  nb.samples.bc.filtered,
                                                  nb.samples.with.slide,
                                                  nb.samples.with.clinical, 
                                                  nb.samples.with.clinical.after.pca),
                         "Description" = c("total number of samples",
                                           "samples with bisulfate conversion > 88",
                                           "samples with slide information",
                                           "samples with clinical data",
                                           "Samples after PCA"),
                         "Difference" = c("-",
                                          nb.samples.bc.filtered - nb.samples ,
                                          nb.samples.with.slide - nb.samples.bc.filtered ,
                                          nb.samples.with.clinical - nb.samples.with.slide ,
                                          nb.samples.with.clinical.after.pca - nb.samples.with.clinical)
)    
df.samples

# Create summary table
df.probes <- data.frame("Number of probes" = c(nb.probes,
                                               nb.probes.detectP, 
                                               nb.probes.detectP.cg,
                                               nb.probes.cg.dmrcate),
                        "Description" = c("total number of probes in raw data",
                                          "detection P < 0.01",
                                          "only probes that start with cg",
                                          "DMRcate"),
                        "Difference" = c("-",
                                         nb.probes.detectP - nb.probes ,
                                         nb.probes.detectP.cg - nb.probes.detectP,
                                         nb.probes.cg.dmrcate - nb.probes.detectP.cg)
)
df.probes

save(df.samples,df.probes,file = file.path(data.dir.table, "ROSMAP_table.rda"))

7 Compute neuron proportion

Data from https://www.tandfonline.com/doi/full/10.4161/epi.23924

Input: London_PFC_QNBMIQ_PCfiltered.RDS, pheno107_PFC_df.RDS
Output: pheno107_PFC_withNeuronProp_df.RDS

objects <- load("../../CET/CETS_Image.RData")
objects

7.1 Get reference profile from Caucasions + controls

idx <- list(
  controlNeuron = pdBrain$celltype == "N" & pdBrain$diag == "Control" & pdBrain$ethnicity == "Caucasian",
  controlGlia   = pdBrain$celltype == "G" & pdBrain$diag == "Control" & pdBrain$ethnicity == "Caucasian"
)

refProfile <- getReference(brain, idx)
head(refProfile)


##### 2. Estimate proportions of neurons in PFC samples ########################

### Limit to 10,000 cpgs in the refProfile dataset
pfc <- readRDS(file.path(data.dir.pca,"ROSMAP_QNBMIQ_PCfiltered.RDS")) 

selected <- rownames(pfc) %in% rownames(refProfile)
table(selected)

pfc.refcpgs <- pfc[selected, ] 

### Estimate proportion of neurons
prop <- data.frame(estProportion(pfc.refcpgs, profile = refProfile))
colnames(prop) <- "prop.neuron"


##### 3. Merge pfc.refcpgs with phenotype file #################################
pheno <- readRDS(file.path(data.dir.pca,"pheno_PCfiltered.RDS"))

pheno_final <- merge(
  pheno,
  prop,
  by.x = "Sample",
  by.y = "row.names"
)

saveRDS(pheno_final, paste0(data.dir.neuron, "pheno_PFC_withNeuronProp_df.RDS"))

8 Linear regression by cpgs Methylation

8.1 Import datasets

beta_mat <- readRDS(file.path(data.dir.pca,"ROSMAP_QNBMIQ_PCfiltered.RDS")) 
pheno_df <- readRDS(paste0(data.dir.neuron, "pheno_PFC_withNeuronProp_df.RDS"))

8.2 Test all regions

### Compute M values
mval_mat <- log2(beta_mat / (1 - beta_mat))

pheno_df <- pheno_df[match(colnames(mval_mat),pheno_df$Sample),]
identical(pheno_df$Sample, colnames(mval_mat))

## [1] TRUE

pheno_df$age.brain <- pheno_df$age_death
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$Slide <- as.factor(pheno_df$Slide) ### ???
pheno_df$batch <- as.factor(pheno_df$batch)
# If rosmap cohort, don't forget batch effect

is(pheno_df$braaksc,"numeric")

## [1] TRUE

is(pheno_df$age.brain,"numeric")

## [1] TRUE

is(pheno_df$prop.neuron,"numeric")

## [1] TRUE

#str(pheno_df)

predictors_char <- "braaksc"
covariates_char <- c("age.brain", "sex", "prop.neuron", "Slide", "batch")
doParallel::registerDoParallel(cores = 20)
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")

results_ordered_df <- plyr::adply(mval_mat,1, function(row){
  
  sumOneRegion_df <- as.data.frame(t(row))
  
  result <- TestSingleRegion(
    predictors_char = predictors_char,
    covariates_char = covariates_char,
    pheno_df = pheno_df,
    sumOneRegion_df = sumOneRegion_df
  )
  result
}, .progress = "time",.parallel = TRUE,.id = "cpg")
colnames(results_ordered_df)[1] <- "cpg"


identical(row.names(mval_mat), results_ordered_df$cpg %>% as.character())

results_ordered_df$fdr <- p.adjust(
    results_ordered_df$pValue,
    method = "fdr"
)

write.csv(
  results_ordered_df,
  paste0(data.dir.single.cpg.pval, "ROSMAP_PFC_single_cpg_pVal_df.csv"),
  row.names = FALSE
)

results_ordered_df <- readr::read_csv(paste0(data.dir.single.cpg.pval, "ROSMAP_PFC_single_cpg_pVal_df.csv"))

## Parsed with column specification:
## cols(
##   cpg = col_character(),
##   Estimate = col_double(),
##   StdErr = col_double(),
##   pValue = col_double(),
##   fdr = col_double()
## )

results_ordered_df

9 Linear regression by regions median Methylation

9.1 Residuals control and coMethylated Regions

Take residuals
Find co-methylated regions

Input:

QNBMIQ_PCfiltered
pheno_withNeuronProp_df

Output:

QNBMIQ_PCfiltered_mvalResiduals
residuals_cometh_ls

9.1.1 Take residuals

##### 1. Import datasets #######################################################
beta_mat <- readRDS(grep("QNBMIQ",dir(data.dir.pca,full.names = T),ignore.case = T,value = T)) 
pheno_df <- readRDS(dir(data.dir.neuron,full.names = T)) 

### Compute M values
mvalue_mat <- log2(beta_mat / (1 - beta_mat))

### Reorder samples based on pheno_df
mvalue_mat <- mvalue_mat[, pheno_df$Sample]

identical(colnames(mvalue_mat),  pheno_df$Sample)

### Take residuals
lmF <- function(mval){
  fitE <- lm(
    as.numeric(mval) ~ age_death + msex + prop.neuron + as.character(Slide) + batch, #add batch if rosmap
    data = pheno_df,
    na.action = na.exclude
  )
  residuals (fitE)
}
doParallel::registerDoParallel(cores = 4)
resid <- plyr::adply(mvalue_mat,1,.fun = lmF,.progress = "time",.parallel = TRUE)
rownames(resid) <- resid[,1]
resid[,1] <- NULL
colnames(resid) <- colnames(mvalue_mat)
dim(resid)
dim(mvalue_mat)

### Save dataset
saveRDS(
  resid,
  paste0(data.dir.residuals, 
         "ROSMAP_QNBMIQ_PCfiltered_mvalResiduals.RDS"
  )
)

9.1.2 Find co-methylated regions

### Import datasets
mvalue_residuals_mat <- readRDS(
  paste0(data.dir.residuals, 
         "ROSMAP_QNBMIQ_PCfiltered_mvalResiduals.RDS"
  )
)

### Call in functions
library(coMethDMR)
library(BiocParallel)

probes.cluster.all <- coMethDMR::getPredefinedCluster(arrayType = "450k",
                                                      clusterType = "regions")

### Find co-methylated clusters
ncores <- 6
coMeth_ls <- CoMethAllRegions(
  dnam = mvalue_residuals_mat,      
  betaToM = FALSE,                   
  CpGs_ls = probes.cluster.all,
  arrayType = "450k",
  rDropThresh_num = 0.4,
  minPairwiseCorr = NULL,
  method = "spearman",             
  returnAllCpGs = TRUE,              
  output = "all",
  nCores_int = ncores,
  progressbar = FALSE
)


saveRDS(
  coMeth_ls,
  paste0(data.dir.residuals, 
         "ROSMAP_residuals_cometh_input_ls.RDS")
)

9.2 Linear regression by regions median Methylation

Calculate medians by cluster and sample
linear regression

Input:

QNBMIQ_PCfiltered,
pheno_withNeuronProp_df
residuals_cometh_input_ls

Output:

info_df
mediansMval_df
linear_df

9.2.1 Calculate medians by cluster and sample

### Import datasets
beta_mat <- beta_mat <- readRDS(dir(data.dir.pca, pattern = "QNBMIQ", full.names = TRUE))
pheno_df <- readRDS(dir(data.dir.neuron,full.names = T)) 
mval_mat <- log2(beta_mat / (1 - beta_mat)) %>% as.matrix()
coMeth_ls <- readRDS(
  paste0(data.dir.residuals, 
         "ROSMAP_residuals_cometh_input_ls.RDS"
  )
)

### Create info dataset
input_cometh <- data.frame(
  inputRegion = coMeth_ls$inputRegion_chr,
  nCoMethRegion = coMeth_ls$nCoMethRegions_num,
  coMethRegion = names(coMeth_ls$coMeth_ls),
  nCpGs = unlist(lapply(coMeth_ls$coMeth_ls, length), use.names = FALSE),
  stringsAsFactors = FALSE
)

input_cometh_nodup <- input_cometh[
  !duplicated(input_cometh$coMethRegion),
  ]
colnames(input_cometh_nodup) <- c(
  paste0(cohort, "_inputRegion"),
  paste0(cohort, "_nCoMethRegion"),
  paste0(cohort, "_coMethRegion"),
  paste0(cohort, "_nCpGs")
)

saveRDS(
  input_cometh_nodup,
  paste0(data.dir.median, cohort, "_info_df.rds")
)

### Take median of probes in each cluster for each sample
filename <-  paste0(data.dir.median, cohort, "_mediansMval_df.rds")

library(robustbase)
mval_mat <- mval_mat[rownames(mval_mat) %in% unlist(coMeth_ls$coMeth_ls),]
if(!file.exists(filename)){
  medianMval.df <- plyr::ldply(
    coMeth_ls$coMeth_ls[!duplicated(names(coMeth_ls$coMeth_ls))],
    function(probes){
      colMedians(mval_mat[as.character(probes),], na.rm = TRUE)
    },
    .progress = "time"
  )
  medianMval.df$.id <- NULL
  colnames(medianMval.df) <- colnames(mval_mat)
  saveRDS(medianMval.df, file = filename)
} else {
  medianMval.df <- readRDS(filename)
}

9.2.2 Test all regions – linear regressions

### Import datasets
info_df <- readRDS(
  paste0(data.dir.median, cohort, "_info_df.rds")
)
mediansMval_df <- readRDS(
  paste0(data.dir.median, cohort, "_mediansMval_df.rds")
)
pheno_df <- readRDS(dir(data.dir.neuron,full.names = T)) 

### Check variables before fitting model
pheno_df$Sample <- pheno_df$Sample_Group

identical(pheno_df$Sample, colnames(mediansMval_df))

## [1] FALSE

#str(pheno_df)

pheno_df$msex <- as.factor(pheno_df$msex)
pheno_df$Slide <- as.factor(pheno_df$Slide)
pheno_df$age.brain <- pheno_df$age_death
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$batch <- as.factor(pheno_df$batch)
# If rosmap cohort, don't forget batch effect

is(pheno_df$braaksc,"numeric")

## [1] TRUE

is(pheno_df$age.brain,"numeric")

## [1] TRUE

is(pheno_df$prop.neuron,"numeric")

## [1] TRUE

#str(pheno_df)

predictors_char <- "braaksc"
covariates_char <- c("age.brain", "msex", "prop.neuron", "Slide", "batch")

devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")

res_df <- TestAllRegions_noInfo(
  predictors_char = predictors_char,
  covariates_char = covariates_char,
  pheno_df = pheno_df,
  summarizedRegions_df = mediansMval_df,
  cores = 1 
)

colnames(res_df) <- c(
  paste0(cohort, "_estimate"),
  paste0(cohort, "_se"),
  paste0(cohort, "_pVal"),
  paste0(cohort, "_fdr")
)

res_withInfo_df <- cbind(info_df, res_df)

saveRDS(
  res_withInfo_df,
  paste0(data.dir.median, cohort, "_linear_df.rds")
)

file <- dir(data.dir.median,pattern = paste0(".*linear_df"),
            recursive = T,
            full.names = TRUE,
            ignore.case = T)
file

## [1] "DATASETS//ROSMAP////step11_median//ROSMAP_linear_df.rds"

res_withInfo_df <- readRDS(file)
dim(res_withInfo_df)

## [1] 38679     8

res_withInfo_df

10 Data final

dir(path = data.dir,recursive = T,pattern = ".rda|.csv|.RDS")

##  [1] "DNAm/ROSMAPbetaMatrixraw735ind.rda"                                         
##  [2] "DNAm/ROSMAPbetaMatrixraw743ind.rda"                                         
##  [3] "step1_download/AllIdat/phenoData.RDS"                                       
##  [4] "step1_download/Metadata/Bile Acid data dictionary.csv"                      
##  [5] "step1_download/Metadata/ROSMAP_arraymiRNA_covariates.csv"                   
##  [6] "step1_download/Metadata/ROSMAP_assay_proteomicsTMTquantitation_metadata.csv"
##  [7] "step1_download/Metadata/ROSMAP_assay_RNAseq_metadata.csv"                   
##  [8] "step1_download/Metadata/ROSMAP_assay_scRNAseq_metadata.csv"                 
##  [9] "step1_download/Metadata/ROSMAP_assay_snpArray_metadata.csv"                 
## [10] "step1_download/Metadata/ROSMAP_assay_wholeGenomeSeq_metadata.csv"           
## [11] "step1_download/Metadata/ROSMAP_biospecimen_metadata.csv"                    
## [12] "step1_download/Metadata/ROSMAP_Brain_p180_Data_Dictionary.csv"              
## [13] "step1_download/Metadata/ROSMAP_ChIPseq_covariates.csv"                      
## [14] "step1_download/Metadata/ROSMAP_ChIPseq_metaData.csv"                        
## [15] "step1_download/Metadata/ROSMAP_Clinical_2019-05_v3.csv"                     
## [16] "step1_download/Metadata/ROSMAP_IDkey.csv"                                   
## [17] "step10_residuals/ROSMAP_QNBMIQ_PCfiltered_mvalResiduals.RDS"                
## [18] "step10_residuals/ROSMAP_residuals_cometh_input_ls.RDS"                      
## [19] "step2_read_minfi/RGSet.RDS"                                                 
## [20] "step3_bsConvFilter/RGSet_bsfiltered.rda"                                    
## [21] "step4_clinical_available_filtering/ROSMAPbetaMatrixraw734ind.rda"           
## [22] "step4_clinical_available_filtering/ROSMAPbetaMatrixraw737ind.rda"           
## [23] "step5_probesQC_filtering/betas_CG_XY_SNPfiltered.rda"                       
## [24] "step6_normalization/quantro.rda"                                            
## [25] "step7_pca_filtering/pheno_PCfiltered.RDS"                                   
## [26] "step7_pca_filtering/ROSMAP_PCs_usingBetas.csv"                              
## [27] "step7_pca_filtering/ROSMAP_QNBMIQ_PCfiltered.RDS"                           
## [28] "step8_neuron_comp/pheno_PFC_withNeuronProp_df.RDS"                          
## [29] "step9_single_cpg_pval/ROSMAP_PFC_single_cpg_pVal_df.csv"

11 Session information

devtools::session_info()

## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.6.1 (2019-07-05)
##  os       Ubuntu 19.10                
##  system   x86_64, linux-gnu           
##  ui       X11                         
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/New_York            
##  date     2020-04-27                  
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package                                       * version  date       lib
##  acepack                                         1.4.1    2016-10-29 [2]
##  affy                                            1.64.0   2019-10-29 [2]
##  affyio                                          1.56.0   2019-10-29 [2]
##  annotate                                        1.64.0   2019-10-29 [2]
##  AnnotationDbi                                 * 1.48.0   2019-10-29 [2]
##  AnnotationFilter                                1.10.0   2019-10-29 [2]
##  AnnotationHub                                 * 2.18.0   2019-10-29 [2]
##  askpass                                         1.1      2019-01-13 [2]
##  assertthat                                      0.2.1    2019-03-21 [2]
##  backports                                       1.1.6    2020-04-05 [2]
##  base64                                          2.0      2016-05-10 [2]
##  base64enc                                       0.1-3    2015-07-28 [2]
##  beanplot                                        1.2      2014-09-19 [2]
##  Biobase                                       * 2.46.0   2019-10-29 [2]
##  BiocFileCache                                 * 1.10.2   2019-11-08 [2]
##  BiocGenerics                                  * 0.32.0   2019-10-29 [2]
##  BiocManager                                     1.30.10  2019-11-16 [2]
##  BiocParallel                                  * 1.20.1   2019-12-21 [2]
##  BiocVersion                                     3.10.1   2019-06-06 [2]
##  biomaRt                                         2.42.1   2020-03-26 [2]
##  Biostrings                                    * 2.54.0   2019-10-29 [2]
##  biovizBase                                      1.34.1   2019-12-04 [2]
##  bit                                             1.1-15.2 2020-02-10 [2]
##  bit64                                           0.9-7    2017-05-08 [2]
##  bitops                                          1.0-6    2013-08-17 [2]
##  blob                                            1.2.1    2020-01-20 [2]
##  BSgenome                                        1.54.0   2019-10-29 [2]
##  bsseq                                           1.22.0   2019-10-29 [2]
##  bumphunter                                    * 1.28.0   2019-10-29 [2]
##  callr                                           3.4.3    2020-03-28 [2]
##  cellranger                                      1.1.0    2016-07-27 [2]
##  checkmate                                       2.0.0    2020-02-06 [2]
##  cli                                             2.0.2    2020-02-28 [2]
##  cluster                                       * 2.1.0    2019-06-19 [4]
##  codetools                                       0.2-16   2018-12-24 [4]
##  colorspace                                      1.4-1    2019-03-18 [2]
##  crayon                                          1.3.4    2017-09-16 [2]
##  curl                                            4.3      2019-12-02 [2]
##  data.table                                      1.12.8   2019-12-09 [2]
##  DBI                                             1.1.0    2019-12-15 [2]
##  dbplyr                                        * 1.4.2    2019-06-17 [2]
##  DelayedArray                                  * 0.12.3   2020-04-09 [2]
##  DelayedMatrixStats                              1.8.0    2019-10-29 [2]
##  desc                                            1.2.0    2018-05-01 [2]
##  devtools                                        2.3.0    2020-04-10 [2]
##  dichromat                                       2.0-0    2013-01-24 [2]
##  digest                                          0.6.25   2020-02-23 [2]
##  DMRcate                                       * 2.0.7    2020-01-10 [2]
##  DMRcatedata                                   * 2.2.1    2020-02-27 [1]
##  DNAcopy                                         1.60.0   2019-10-29 [2]
##  doRNG                                           1.8.2    2020-01-27 [2]
##  dplyr                                         * 0.8.5    2020-03-07 [2]
##  DSS                                             2.34.0   2019-10-29 [2]
##  edgeR                                           3.28.1   2020-02-26 [2]
##  ellipsis                                        0.3.0    2019-09-20 [2]
##  ensembldb                                       2.10.2   2019-11-20 [2]
##  evaluate                                        0.14     2019-05-28 [2]
##  ExperimentHub                                 * 1.12.0   2019-10-29 [2]
##  fansi                                           0.4.1    2020-01-08 [2]
##  fastmap                                         1.0.1    2019-10-08 [2]
##  FDb.InfiniumMethylation.hg19                  * 2.2.0    2019-12-10 [2]
##  foreach                                       * 1.5.0    2020-03-30 [2]
##  foreign                                         0.8-76   2020-03-03 [4]
##  Formula                                         1.2-3    2018-05-03 [2]
##  fs                                              1.4.1    2020-04-04 [2]
##  genefilter                                      1.68.0   2019-10-29 [2]
##  GenomeInfoDb                                  * 1.22.1   2020-03-27 [2]
##  GenomeInfoDbData                                1.2.2    2019-12-02 [2]
##  GenomicAlignments                               1.22.1   2019-11-12 [2]
##  GenomicFeatures                               * 1.38.2   2020-02-15 [2]
##  GenomicRanges                                 * 1.38.0   2019-10-29 [2]
##  GEOquery                                        2.54.1   2019-11-18 [2]
##  ggplot2                                       * 3.3.0    2020-03-05 [2]
##  ggrepel                                       * 0.8.2    2020-03-08 [2]
##  glue                                            1.4.0    2020-04-03 [2]
##  gridExtra                                       2.3      2017-09-09 [2]
##  gtable                                          0.3.0    2019-03-25 [2]
##  gtools                                          3.8.2    2020-03-31 [2]
##  Gviz                                            1.30.3   2020-02-17 [2]
##  HDF5Array                                       1.14.3   2020-02-28 [2]
##  Hmisc                                           4.4-0    2020-03-23 [2]
##  hms                                             0.5.3    2020-01-08 [2]
##  htmlTable                                       1.13.3   2019-12-04 [2]
##  htmltools                                       0.4.0    2019-10-04 [2]
##  htmlwidgets                                     1.5.1    2019-10-08 [2]
##  httpuv                                          1.5.2    2019-09-11 [2]
##  httr                                            1.4.1    2019-08-05 [2]
##  IlluminaHumanMethylation450kanno.ilmn12.hg19  * 0.6.0    2019-11-08 [2]
##  IlluminaHumanMethylationEPICanno.ilm10b4.hg19   0.6.0    2020-01-09 [2]
##  illuminaio                                    * 0.28.0   2019-10-29 [2]
##  interactiveDisplayBase                          1.24.0   2019-10-29 [2]
##  IRanges                                       * 2.20.2   2020-01-13 [2]
##  iterators                                     * 1.0.12   2019-07-26 [2]
##  jpeg                                            0.1-8.1  2019-10-24 [2]
##  jsonlite                                        1.6.1    2020-02-02 [2]
##  KernSmooth                                      2.23-16  2019-10-15 [4]
##  knitr                                           1.28     2020-02-06 [2]
##  later                                           1.0.0    2019-10-04 [2]
##  lattice                                         0.20-41  2020-04-02 [4]
##  latticeExtra                                    0.6-29   2019-12-19 [2]
##  lazyeval                                        0.2.2    2019-03-15 [2]
##  lifecycle                                       0.2.0    2020-03-06 [2]
##  limma                                         * 3.42.2   2020-02-03 [2]
##  locfit                                        * 1.5-9.4  2020-03-25 [2]
##  lumi                                          * 2.38.0   2019-10-29 [2]
##  magrittr                                        1.5      2014-11-22 [2]
##  MASS                                            7.3-51.5 2019-12-20 [4]
##  Matrix                                          1.2-18   2019-11-27 [2]
##  matrixStats                                   * 0.56.0   2020-03-13 [2]
##  mclust                                          5.4.6    2020-04-11 [2]
##  memoise                                         1.1.0    2017-04-21 [2]
##  methylumi                                     * 2.32.0   2019-10-29 [2]
##  mgcv                                            1.8-31   2019-11-09 [4]
##  mime                                            0.9      2020-02-04 [2]
##  minfi                                         * 1.32.0   2019-10-29 [2]
##  missMethyl                                      1.21.4   2020-02-03 [2]
##  multtest                                        2.42.0   2019-10-29 [2]
##  munsell                                         0.5.0    2018-06-12 [2]
##  nleqslv                                         3.3.2    2018-05-17 [2]
##  nlme                                            3.1-147  2020-04-13 [4]
##  nnet                                            7.3-13   2020-02-25 [4]
##  nor1mix                                         1.3-0    2019-06-13 [2]
##  openssl                                         1.4.1    2019-07-18 [2]
##  org.Hs.eg.db                                  * 3.10.0   2019-12-02 [2]
##  permute                                         0.9-5    2019-03-12 [2]
##  pillar                                          1.4.3    2019-12-20 [2]
##  pkgbuild                                        1.0.6    2019-10-09 [2]
##  pkgconfig                                       2.0.3    2019-09-22 [2]
##  pkgload                                         1.0.2    2018-10-29 [2]
##  plyr                                            1.8.6    2020-03-03 [2]
##  png                                             0.1-7    2013-12-03 [2]
##  preprocessCore                                  1.48.0   2019-10-29 [2]
##  prettyunits                                     1.1.1    2020-01-24 [2]
##  processx                                        3.4.2    2020-02-09 [2]
##  progress                                        1.2.2    2019-05-16 [2]
##  promises                                        1.1.0    2019-10-04 [2]
##  ProtGenerics                                    1.18.0   2019-10-29 [2]
##  ps                                              1.3.2    2020-02-13 [2]
##  purrr                                           0.3.3    2019-10-18 [2]
##  quadprog                                        1.5-8    2019-11-20 [2]
##  R.methodsS3                                     1.8.0    2020-02-14 [2]
##  R.oo                                            1.23.0   2019-11-03 [2]
##  R.utils                                         2.9.2    2019-12-08 [2]
##  R6                                              2.4.1    2019-11-12 [2]
##  randomForest                                    4.6-14   2018-03-25 [2]
##  rappdirs                                        0.3.1    2016-03-28 [2]
##  RColorBrewer                                    1.1-2    2014-12-07 [2]
##  Rcpp                                            1.0.4.6  2020-04-09 [2]
##  RCurl                                           1.98-1.1 2020-01-19 [2]
##  readr                                           1.3.1    2018-12-21 [2]
##  readxl                                          1.3.1    2019-03-13 [2]
##  remotes                                         2.1.1    2020-02-15 [2]
##  reshape                                         0.8.8    2018-10-23 [2]
##  reshape2                                      * 1.4.4    2020-04-09 [2]
##  rhdf5                                           2.30.1   2019-11-26 [2]
##  Rhdf5lib                                        1.8.0    2019-10-29 [2]
##  rlang                                           0.4.5    2020-03-01 [2]
##  rmarkdown                                       2.1      2020-01-20 [2]
##  rngtools                                        1.5      2020-01-23 [2]
##  ROC                                           * 1.62.0   2019-10-29 [2]
##  rpart                                           4.1-15   2019-04-12 [4]
##  RPMM                                          * 1.25     2017-02-28 [2]
##  rprojroot                                       1.3-2    2018-01-03 [2]
##  Rsamtools                                       2.2.3    2020-02-23 [2]
##  RSQLite                                         2.2.0    2020-01-07 [2]
##  rstudioapi                                      0.11     2020-02-07 [2]
##  rtracklayer                                     1.46.0   2019-10-29 [2]
##  S4Vectors                                     * 0.24.4   2020-04-09 [2]
##  scales                                        * 1.1.0    2019-11-18 [2]
##  scrime                                          1.3.5    2018-12-01 [2]
##  sesame                                        * 1.4.0    2019-10-29 [2]
##  sesameData                                    * 1.4.0    2019-11-05 [2]
##  sessioninfo                                     1.1.1    2018-11-05 [2]
##  shiny                                           1.4.0.2  2020-03-13 [2]
##  siggenes                                        1.60.0   2019-10-29 [2]
##  statmod                                         1.4.34   2020-02-17 [2]
##  stringi                                         1.4.6    2020-02-17 [2]
##  stringr                                         1.4.0    2019-02-10 [2]
##  SummarizedExperiment                          * 1.16.1   2019-12-19 [2]
##  survival                                        3.1-12   2020-04-10 [4]
##  testthat                                        2.3.2    2020-03-02 [2]
##  tibble                                          3.0.0    2020-03-30 [2]
##  tidyr                                           1.0.2    2020-01-24 [2]
##  tidyselect                                      1.0.0    2020-01-27 [2]
##  TxDb.Hsapiens.UCSC.hg19.knownGene             * 3.2.2    2019-12-10 [2]
##  usethis                                         1.6.0    2020-04-09 [2]
##  VariantAnnotation                               1.32.0   2019-10-29 [2]
##  vctrs                                           0.2.4    2020-03-10 [2]
##  wateRmelon                                    * 1.30.0   2019-10-29 [2]
##  wheatmap                                        0.1.0    2018-03-15 [2]
##  withr                                           2.1.2    2018-03-15 [2]
##  xfun                                            0.13     2020-04-13 [2]
##  XML                                             3.99-0.3 2020-01-20 [2]
##  xml2                                            1.3.1    2020-04-09 [2]
##  xtable                                          1.8-4    2019-04-21 [2]
##  XVector                                       * 0.26.0   2019-10-29 [2]
##  yaml                                            2.2.1    2020-02-01 [2]
##  zlibbioc                                        1.32.0   2019-10-29 [2]
##  source                             
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.0)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Github (Oshlack/missMethyl@5792fbc)
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
##  CRAN (R 3.6.1)                     
##  Bioconductor                       
## 
## [1] /home/tiagochst/R/x86_64-pc-linux-gnu-library/3.6
## [2] /usr/local/lib/R/site-library
## [3] /usr/lib/R/site-library
## [4] /usr/lib/R/library

ROSMAP dataset

Lanyu Zhang, Tiago C. Silva, Lissette Gomez, Lily Wang

2020-04-27

1 Data retrival

1.1 Download data

2 Data Pre-processing

3 Sample subset

4 Normalization

4.1 Quantile normalization

5 Outliers detection - PCA analysis

5.1 Filter samples by PCA, SAVE files

6 Summary after QC steps

6.1 Data and metadata

6.2 Numbers of samples and probes removed in each step

7 Compute neuron proportion

7.1 Get reference profile from Caucasions + controls

8 Linear regression by cpgs Methylation

8.1 Import datasets

8.2 Test all regions

9 Linear regression by regions median Methylation

9.1 Residuals control and coMethylated Regions

9.1.1 Take residuals

9.1.2 Find co-methylated regions

9.2 Linear regression by regions median Methylation

9.2.1 Calculate medians by cluster and sample

9.2.2 Test all regions – linear regressions

10 Data final

11 Session information