Rosmap dataset
rosmap <- pheno.ls$ROSMAP[, c("Sample", "msex", "braaksc", "age_death")]
rosmap$age <- rosmap$age_death
rosmap$status <- ifelse(rosmap$braaksc < 3, "Control", "Case")
rosmap$status <- as.factor(rosmap$status)
rosmap$sex <- ifelse(rosmap$msex == 1, "Male", "Female")
rosmap$sex <- as.factor(rosmap$sex)
str(rosmap)
## 'data.frame': 726 obs. of 7 variables:
## $ Sample : chr "PT-313T" "PT-3142" "PT-318X" "PT-35N6" ...
## $ msex : num 1 0 1 1 0 0 0 0 1 1 ...
## $ braaksc : num 1 5 4 6 3 3 3 4 4 3 ...
## $ age_death: num 84.4 90 87.9 89 90 ...
## $ age : num 84.4 90 87.9 89 90 ...
## $ status : Factor w/ 2 levels "Case","Control": 2 1 1 1 1 1 1 1 1 1 ...
## $ sex : Factor w/ 2 levels "Female","Male": 2 1 2 2 1 1 1 1 2 2 ...
{
set.seed(5) # matchControl uses a distance calculation, if ties, it uses sample function
m_ros <- matchControls(
formula = status ~ age + sex,
data = rosmap,
contlabel = "Case",
caselabel = "Control"
)
}
outFile <- data.frame(
"matchedControls" = rosmap[m_ros$cases, "Sample"],
"matchedCases" = rosmap[m_ros$controls, "Sample"]
)
outFileControl <- merge(
x = outFile,
y = rosmap[, c("Sample", "status", "age", "sex")],
by.x = "matchedControls",
by.y = "Sample"
)
colnames(outFileControl)[3:5] <- c("status_control", "age_control", "sex_control")
outFileControlCases <- merge(
x = outFileControl,
y = rosmap[, c("Sample", "status", "age", "sex")],
by.x = "matchedCases",
by.y = "Sample"
)
colnames(outFileControlCases)[6:8] <- c("status_case", "age_case", "sex_case")
outFileControlCases <- outFileControlCases[, c(2, 3, 4, 5, 1, 6, 7, 8)]
write.csv(
outFileControlCases,
paste0(dir.result, "Rosmap_matchedCaseControls.csv"),
row.names = FALSE
)
ROSMAP_matchAgeSex <- outFileControlCases %>%
dplyr::filter(abs(age_control - age_case) <= 1)
dim(ROSMAP_matchAgeSex)
## [1] 122 8
table(ROSMAP_matchAgeSex$status_control, ROSMAP_matchAgeSex$sex_control)
##
## Female Male
## Case 0 0
## Control 71 51
table(ROSMAP_matchAgeSex$status_case, ROSMAP_matchAgeSex$sex_case)
##
## Female Male
## Case 71 51
## Control 0 0
ROSMAP_samples <- c(
as.character(ROSMAP_matchAgeSex$matchedControls),
as.character(ROSMAP_matchAgeSex$matchedCases)
)
saveRDS(
ROSMAP_samples,
paste0(dir.result, "ROSMAP_samples.RDS")
)
Samples
length(readRDS(paste0(dir.result, "ROSMAP_samples.RDS")))
## [1] 244
Linear model
### Import datasets
cohort <- "ROSMAP"
pheno <- "pheno"
info_df <- readRDS(
dir(path = "../DATASETS/ROSMAP////", pattern = "info", recursive = TRUE, full.names = TRUE)
)
mediansMval_df <- readRDS(
dir(path = "../DATASETS/ROSMAP/", pattern = "medians", recursive = TRUE, full.names = TRUE)
)
pheno_df <- readRDS(
dir(path = "../DATASETS/ROSMAP/", pattern = "Neuron", recursive = TRUE, full.names = TRUE)
)
samples <- readRDS(
paste0(dir.result, "ROSMAP_samples.RDS")
)
### Limit samples
mediansMval_df <- mediansMval_df[, samples]
pheno_df <- pheno_df[pheno_df$Sample %in% samples, ]
mediansMval_df <- mediansMval_df[, pheno_df$Sample]
identical(pheno_df$Sample, colnames(mediansMval_df))
str(pheno_df)
pheno_df$age_death <- as.numeric(as.character(pheno_df$age_death))
pheno_df$msex <- as.factor(as.character(pheno_df$msex))
pheno_df$Slide <- as.factor(as.character(pheno_df$Slide))
pheno_df$batch <- as.factor(as.character(pheno_df$batch))
predictors_char <- "braaksc"
covariates_char <- c("prop.neuron", "batch")
res_df <- TestAllRegions_noInfo(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
summarizedRegions_df = mediansMval_df,
cores = 4
)
colnames(res_df) <- c(
paste0(cohort, "_estimate"),
paste0(cohort, "_se"),
paste0(cohort, "_pVal"),
paste0(cohort, "_fdr")
)
res_withInfo_df <- cbind(info_df, res_df)
saveRDS(
res_withInfo_df,
paste0(dir.result, cohort, "_matched_linear_df.rds")
)
Single cpg linear model
### Import datasets
beta_mat <- readRDS(
"../DATASETS/ROSMAP/step7_pca_filtering/ROSMAP_QNBMIQ_PCfiltered.RDS"
)
pheno_df <- readRDS(
dir(path = "../DATASETS/ROSMAP/", pattern = "Neuron", recursive = TRUE, full.names = TRUE)
)
samples <- readRDS(
paste0(dir.result, "ROSMAP_samples.RDS")
)
### Limit samples
beta_mat <- beta_mat[, samples]
pheno_df <- pheno_df[pheno_df$Sample %in% samples, ]
beta_mat <- beta_mat[, pheno_df$Sample]
identical(pheno_df$Sample, colnames(beta_mat))
## [1] TRUE
### Compute M values
mval_mat <- log2(beta_mat / (1 - beta_mat))
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$slide <- as.factor(pheno_df$Sentrix_ID)
pheno_df$batch <- as.factor(pheno_df$batch)
str(pheno_df)
## 'data.frame': 244 obs. of 29 variables:
## $ Sample : chr "PT-313T" "PT-318X" "PT-35PM" "PT-BY89" ...
## $ Sentrix_ID : num 5.82e+09 6.04e+09 5.82e+09 5.82e+09 5.82e+09 ...
## $ Sentrix_Position : chr "R04C01" "R02C02" "R05C01" "R04C02" ...
## $ Sample_Plate : chr "WG0001091-MSA4" "WG0003259-MSA4" "WG0001091-MSA4" "WG0001338-MSA4" ...
## $ Sample_Well : chr "H08" "H01" "A03" "F12" ...
## $ Sample_Group : chr "PT-313T" "PT-318X" "PT-35PM" "PT-BY89" ...
## $ batch : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 1 1 2 ...
## $ Sentrix : chr "5822020006_R04C01" "6042324057_R02C02" "5822020007_R05C01" "5822020002_R04C02" ...
## $ Slide : num 5.82e+09 6.04e+09 5.82e+09 5.82e+09 5.82e+09 ...
## $ projid : num 11464261 3713990 22209673 75990666 32705437 ...
## $ Study : chr "ROS" "MAP" "ROS" "MAP" ...
## $ msex : num 1 1 0 1 1 1 0 0 1 1 ...
## $ educ : num 16 12 18 12 16 13 16 16 14 13 ...
## $ race : Factor w/ 4 levels "1","2","3","7": 1 1 2 1 1 1 1 1 1 1 ...
## $ spanish : Factor w/ 2 levels "1","2": 2 2 2 2 2 2 2 2 2 2 ...
## $ apoe_genotype : Factor w/ 6 levels "22","23","24",..: 2 4 5 5 4 4 4 4 2 4 ...
## $ age_at_visit_max : chr "83.507186858316217" "87.668720054757017" "68.668035592060235" "87.463381245722104" ...
## $ age_first_ad_dx : chr NA NA NA NA ...
## $ age_death : num 84.4 87.9 69.2 88 90 ...
## $ cts_mmse30_first_ad_dx: num NA NA NA NA NA NA NA NA NA NA ...
## $ cts_mmse30_lv : num 24 30 26 12 24 27 29 28 29 27 ...
## $ pmi : num 4 4.33 8.5 7.58 13.47 ...
## $ braaksc : num 1 4 3 4 4 1 2 2 1 2 ...
## $ ceradsc : num 4 2 2 2 1 3 4 2 4 4 ...
## $ cogdx : num 4 1 6 5 2 3 1 1 1 1 ...
## $ stage3 : chr "0-2" "3-4" "3-4" "3-4" ...
## $ sex : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 2 2 ...
## $ prop.neuron : num 0.458 0.395 0.41 0.401 0.569 ...
## $ slide : Factor w/ 65 levels "5772325072","5772325089",..: 30 65 31 26 44 28 17 10 3 38 ...
is(pheno_df$braaksc,"numeric")
## [1] TRUE
is(pheno_df$prop.neuron,"numeric")
## [1] TRUE
predictors_char <- "braaksc"
covariates_char <- c("prop.neuron", "batch")
doParallel::registerDoParallel(cores = parallel::detectCores()/2)
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
results_ordered_df <- plyr::adply(mval_mat,1, function(row){
sumOneRegion_df <- as.data.frame(t(row))
result <- TestSingleRegion(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
sumOneRegion_df = sumOneRegion_df
)
result
}, .progress = "time",.parallel = TRUE,.id = "cpg")
colnames(results_ordered_df)[1] <- "cpg"
identical(row.names(mval_mat), results_ordered_df$cpg %>% as.character())
write.csv(
results_ordered_df,
paste0(dir.result, "ROSMAP_matched_single_cpg_linear_df.csv"),
row.names = FALSE
)
results_ordered_df <- readr::read_csv(
paste0(dir.result, "ROSMAP_matched_single_cpg_linear_df.csv"),
col_types = readr::cols()
)
dim(results_ordered_df)
## [1] 431803 4
results_ordered_df %>% head
London dataset
london <- pheno.ls$LONDON[, c("sample", "sex", "stage", "age.brain")]
london$status <- ifelse(london$stage < 3, "Control", "Case")
london$status <- as.factor(london$status)
london$sex <- as.factor(london$sex)
str(london)
{
set.seed(5)
m_london <- matchControls(
formula = status ~ age.brain + sex,
data = london,
contlabel = "Case",
caselabel = "Control"
)
}
outFile <- data.frame(
"matchedControls" = london[m_london$cases, "sample"],
"matchedCases" = london[m_london$controls, "sample"]
)
outFileControl <- merge(
x = outFile,
y = london[, c("sample", "status", "age.brain", "sex")],
by.x = "matchedControls",
by.y = "sample"
)
colnames(outFileControl)[3:5] <- c("status_control", "age_control", "sex_control")
outFileControlCases <- merge(
x = outFileControl,
y = london[, c("sample", "status", "age.brain", "sex")],
by.x = "matchedCases",
by.y = "sample"
)
colnames(outFileControlCases)[6:8] <- c("status_case", "age_case", "sex_case")
outFileControlCases <- outFileControlCases[, c(2, 3, 4, 5, 1, 6, 7, 8)]
write.csv(
outFileControlCases,
paste0(dir.result, "London_matchedCaseControls.csv"),
row.names = FALSE
)
London_matchAgeSex <- outFileControlCases %>%
dplyr::filter(abs(age_control - age_case) <= 1)
dim(London_matchAgeSex)
table(London_matchAgeSex$status_control, London_matchAgeSex$sex_control)
table(London_matchAgeSex$status_case, London_matchAgeSex$sex_case)
London_samples <- c(
as.character(London_matchAgeSex$matchedControls),
as.character(London_matchAgeSex$matchedCases)
)
saveRDS(
London_samples,
paste0(dir.result, "London_samples.RDS")
)
Samples
length(readRDS(paste0(dir.result, "London_samples.RDS")))
## [1] 46
Linear model
### Import datasets
cohort <- "London"
pheno <- "pheno"
info_df <- readRDS(
dir(path = "../DATASETS/LONDON///", pattern = "info", recursive = TRUE, full.names = TRUE)
)
mediansMval_df <- readRDS(
dir(path = "../DATASETS/LONDON/", pattern = "medians", recursive = TRUE, full.names = TRUE)
)
pheno_df <- readRDS(
dir(path = "../DATASETS/LONDON/", pattern = "NeuronProp_df", recursive = TRUE, full.names = TRUE)
)
samples <- readRDS(
paste0(dir.result, "London_samples.RDS")
)
### Limit samples
mediansMval_df <- mediansMval_df[, samples]
pheno_df <- pheno_df[pheno_df$sample %in% samples, ]
mediansMval_df <- mediansMval_df[, pheno_df$sample]
### Check variables before fitting model
pheno_df$Sample <- pheno_df$sample
identical(pheno_df$Sample, colnames(mediansMval_df))
## [1] TRUE
str(pheno_df)
## 'data.frame': 46 obs. of 10 variables:
## $ sample : chr "GSM1443251" "GSM1443256" "GSM1443263" "GSM1443269" ...
## $ subject.id : num 1 2 4 5 6 7 9 10 13 18 ...
## $ sentrix_id : chr "6042316048_R05C01" "6042316066_R05C01" "7786923107_R02C01" "6042316121_R04C02" ...
## $ slide : chr "6042316048" "6042316066" "7786923107" "6042316121" ...
## $ age.brain : num 82 82 81 92 78 86 90 82 80 82 ...
## $ sex : chr "FEMALE" "FEMALE" "FEMALE" "FEMALE" ...
## $ stage : num 0 2 1 2 1 3 6 6 5 6 ...
## $ stage3 : chr "0-2" "0-2" "0-2" "0-2" ...
## $ prop.neuron: num 0.377 0.226 0.206 0.456 0.424 ...
## $ Sample : chr "GSM1443251" "GSM1443256" "GSM1443263" "GSM1443269" ...
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$slide <- as.factor(pheno_df$slide)
# If rosmap cohort, don't forget batch effect
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
predictors_char <- "stage"
covariates_char <- "prop.neuron"
res_df <- TestAllRegions_noInfo(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
summarizedRegions_df = mediansMval_df,
cores = 4
)
colnames(res_df) <- c(
paste0(cohort, "_estimate"),
paste0(cohort, "_se"),
paste0(cohort, "_pVal"),
paste0(cohort, "_fdr")
)
res_withInfo_df <- cbind(info_df, res_df)
saveRDS(
res_withInfo_df,
paste0(dir.result, cohort, "_matched_linear_df.rds")
)
Single cpg linear model
### Import datasets
beta_mat <- readRDS(
"../DATASETS/LONDON/step5_pca_filtering/London_PFC_QNBMIQ_PCfiltered.RDS"
)
pheno_df <- readRDS(
dir("../DATASETS/LONDON/",pattern = "NeuronProp_df",recursive = TRUE,full.names = TRUE)
)
samples <- readRDS(
paste0(dir.result, "London_samples.RDS")
)
### Limit samples
pheno_df$Sample <- pheno_df$sample
beta_mat <- beta_mat[, samples]
pheno_df <- pheno_df[pheno_df$Sample %in% samples, ]
beta_mat <- beta_mat[, pheno_df$Sample]
identical(pheno_df$Sample, colnames(beta_mat))
## [1] TRUE
### Compute M values
mval_mat <- log2(beta_mat / (1 - beta_mat))
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$slide <- as.factor(pheno_df$slide)
str(pheno_df)
## 'data.frame': 46 obs. of 10 variables:
## $ sample : chr "GSM1443251" "GSM1443256" "GSM1443263" "GSM1443269" ...
## $ subject.id : num 1 2 4 5 6 7 9 10 13 18 ...
## $ sentrix_id : chr "6042316048_R05C01" "6042316066_R05C01" "7786923107_R02C01" "6042316121_R04C02" ...
## $ slide : Factor w/ 12 levels "6042316035","6042316048",..: 2 3 9 8 6 1 5 11 7 2 ...
## $ age.brain : num 82 82 81 92 78 86 90 82 80 82 ...
## $ sex : Factor w/ 2 levels "FEMALE","MALE": 1 1 1 1 2 2 1 2 1 1 ...
## $ stage : num 0 2 1 2 1 3 6 6 5 6 ...
## $ stage3 : chr "0-2" "0-2" "0-2" "0-2" ...
## $ prop.neuron: num 0.377 0.226 0.206 0.456 0.424 ...
## $ Sample : chr "GSM1443251" "GSM1443256" "GSM1443263" "GSM1443269" ...
is(pheno_df$stage,"numeric")
## [1] TRUE
is(pheno_df$prop.neuron,"numeric")
## [1] TRUE
predictors_char <- "stage"
covariates_char <- "prop.neuron"
doParallel::registerDoParallel(cores = parallel::detectCores()/2)
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
results_ordered_df <- plyr::adply(mval_mat,1, function(row){
sumOneRegion_df <- as.data.frame(t(row))
result <- TestSingleRegion(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
sumOneRegion_df = sumOneRegion_df
)
result
}, .progress = "time", .parallel = TRUE, .id = "cpg")
colnames(results_ordered_df)[1] <- "cpg"
identical(row.names(mval_mat), results_ordered_df$cpg %>% as.character())
write.csv(
results_ordered_df,
paste0(dir.result, "London_matched_single_cpg_linear_df.csv"),
row.names = FALSE
)
results_ordered_df <- readr::read_csv(
paste0(dir.result, "London_matched_single_cpg_linear_df.csv"),
col_types = readr::cols()
)
dim(results_ordered_df)
## [1] 450793 4
results_ordered_df %>% head
Mt.Sinai dataset
mtsinai <- pheno.ls$MtSinai[, c("sample", "sex", "stage", "age.brain")]
mtsinai$status <- ifelse(mtsinai$stage < 3, "Control", "Case")
mtsinai$status <- as.factor(mtsinai$status)
mtsinai$sex <- as.factor(mtsinai$sex)
str(mtsinai)
{
set.seed(5)
m_mtsinai <- matchControls(
formula = status ~ age.brain + sex,
data = mtsinai,
contlabel = "Case",
caselabel = "Control"
)
}
outFile <- data.frame(
"matchedControls" = mtsinai[m_mtsinai$cases, "sample"],
"matchedCases" = mtsinai[m_mtsinai$controls, "sample"]
)
outFileControl <- merge(
outFile,
mtsinai[, c("sample", "status", "age.brain", "sex")],
by.x = "matchedControls",
by.y = "sample"
)
colnames(outFileControl)[3:5] <- c("status_control", "age_control", "sex_control")
outFileControlCases <- merge(
outFileControl,
mtsinai[, c("sample", "status", "age.brain", "sex")],
by.x = "matchedCases",
by.y = "sample"
)
colnames(outFileControlCases)[6:8] <- c("status_case", "age_case", "sex_case")
outFileControlCases <- outFileControlCases[, c(2, 3, 4, 5, 1, 6, 7, 8)]
write.csv(
outFileControlCases,
paste0(dir.result, "MtSinai_matchedCaseControls.csv"),
row.names = FALSE
)
MtSinai_matchAgeSex <- outFileControlCases %>%
dplyr::filter(abs(age_control - age_case) <= 1)
dim(MtSinai_matchAgeSex)
table(MtSinai_matchAgeSex$status_control, MtSinai_matchAgeSex$sex_control)
table(MtSinai_matchAgeSex$status_case, MtSinai_matchAgeSex$sex_case)
MtSinai_samples <- c(
as.character(MtSinai_matchAgeSex$matchedControls),
as.character(MtSinai_matchAgeSex$matchedCases)
)
saveRDS(
MtSinai_samples,
paste0(dir.result, "MtSinai_samples.RDS")
)
Samples
length(readRDS(paste0(dir.result, "MtSinai_samples.RDS")))
## [1] 56
Linear model
### Import datasets
cohort <- "MtSinai"
pheno <- "pheno"
info_df <- readRDS(
dir("../DATASETS/MtSinai//",pattern = "info",recursive = TRUE,full.names = TRUE)
)
mediansMval_df <- readRDS(
dir("../DATASETS/MtSinai/",pattern = "medians",recursive = TRUE,full.names = TRUE)
)
pheno_df <- readRDS(
dir("../DATASETS/MtSinai/",pattern = "NeuronProp",recursive = TRUE,full.names = TRUE)
)
samples <- readRDS(
paste0(dir.result, "MtSinai_samples.RDS")
)
### Limit samples
mediansMval_df <- mediansMval_df[, samples]
pheno_df <- pheno_df[pheno_df$sample %in% samples, ]
mediansMval_df <- mediansMval_df[, pheno_df$sample]
### Check variables before fitting model
pheno_df$Sample <- pheno_df$sample
identical(pheno_df$Sample, colnames(mediansMval_df))
## [1] TRUE
str(pheno_df)
## 'data.frame': 56 obs. of 9 variables:
## $ sample : chr "GSM2139163" "GSM2139164" "GSM2139166" "GSM2139170" ...
## $ subject.id : chr "MS66" "MS546" "MS1549" "MS842" ...
## $ sentrix_id : chr "7796806144_R01C01" "7796806144_R01C02" "7796806144_R03C01" "7796806144_R05C01" ...
## $ slide : chr "7796806144" "7796806144" "7796806144" "7796806144" ...
## $ age.brain : num 89 80 78 83 78 73 81 93 93 73 ...
## $ sex : chr "male" "female" "male" "female" ...
## $ stage : num 5 2 4 1 2 6 2 6 2 6 ...
## $ prop.neuron: num 0.243 0.202 0.304 0.329 0.235 ...
## $ Sample : chr "GSM2139163" "GSM2139164" "GSM2139166" "GSM2139170" ...
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$slide <- as.factor(pheno_df$slide)
# If rosmap cohort, don't forget batch effect
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
predictors_char <- "stage"
covariates_char <- "prop.neuron"
res_df <- TestAllRegions_noInfo(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
summarizedRegions_df = mediansMval_df,
cores = 4
)
colnames(res_df) <- c(
paste0(cohort, "_estimate"),
paste0(cohort, "_se"),
paste0(cohort, "_pVal"),
paste0(cohort, "_fdr")
)
res_withInfo_df <- cbind(info_df, res_df)
saveRDS(
res_withInfo_df,
paste0(dir.result, cohort, "_matched_linear_df.rds")
)
Single cpg linear model
### Import datasets
beta_mat <- readRDS(
"../DATASETS/MtSinai/step5_pca_filtering/MtSinai_QNBMIQ_PCfiltered.RDS"
)
pheno_df <- readRDS(
dir("../DATASETS/MtSinai/",pattern = "Neuron",recursive = TRUE,full.names = TRUE)
)
samples <- readRDS(
paste0(dir.result, "MtSinai_samples.RDS")
)
### Limit samples
pheno_df$Sample <- pheno_df$sample
beta_mat <- beta_mat[, samples]
pheno_df <- pheno_df[pheno_df$Sample %in% samples, ]
beta_mat <- beta_mat[, pheno_df$Sample]
identical(pheno_df$Sample, colnames(beta_mat))
## [1] TRUE
### Compute M values
mval_mat <- log2(beta_mat / (1 - beta_mat))
pheno_df$sex <- as.factor(pheno_df$sex)
pheno_df$slide <- as.factor(pheno_df$slide)
str(pheno_df)
## 'data.frame': 56 obs. of 9 variables:
## $ sample : chr "GSM2139163" "GSM2139164" "GSM2139166" "GSM2139170" ...
## $ subject.id : chr "MS66" "MS546" "MS1549" "MS842" ...
## $ sentrix_id : chr "7796806144_R01C01" "7796806144_R01C02" "7796806144_R03C01" "7796806144_R05C01" ...
## $ slide : Factor w/ 12 levels "7796806144","7796814008",..: 1 1 1 1 1 2 2 2 2 2 ...
## $ age.brain : num 89 80 78 83 78 73 81 93 93 73 ...
## $ sex : Factor w/ 2 levels "female","male": 2 1 2 1 2 2 2 1 1 1 ...
## $ stage : num 5 2 4 1 2 6 2 6 2 6 ...
## $ prop.neuron: num 0.243 0.202 0.304 0.329 0.235 ...
## $ Sample : chr "GSM2139163" "GSM2139164" "GSM2139166" "GSM2139170" ...
is(pheno_df$stage,"numeric")
## [1] TRUE
is(pheno_df$prop.neuron,"numeric")
## [1] TRUE
predictors_char <- "stage"
covariates_char <- "prop.neuron"
doParallel::registerDoParallel(cores = parallel::detectCores()/2)
devtools::source_gist("https://gist.github.com/tiagochst/d3a7b1639acf603916c315d23b1efb3e")
results_ordered_df <- plyr::adply(mval_mat,1, function(row){
sumOneRegion_df <- as.data.frame(t(row))
result <- TestSingleRegion(
predictors_char = predictors_char,
covariates_char = covariates_char,
pheno_df = pheno_df,
sumOneRegion_df = sumOneRegion_df
)
result
}, .progress = "time",.parallel = TRUE,.id = "cpg")
colnames(results_ordered_df)[1] <- "cpg"
identical(row.names(mval_mat), results_ordered_df$cpg %>% as.character())
write.csv(
results_ordered_df,
paste0(dir.result, "MtSinai_matched_single_cpg_linear_df.csv"),
row.names = FALSE
)
results_ordered_df <- readr::read_csv(
paste0(dir.result, "MtSinai_matched_single_cpg_linear_df.csv"),
col_types = readr::cols()
)
dim(results_ordered_df)
## [1] 434610 4
results_ordered_df %>% head