suppressPackageStartupMessages({library(MOFA2)
library(kableExtra)
library(data.table)
library(ggplot2)
library(tidyverse)
library(readr)
library(here)
library(matrixStats)
library(imputeTS)
# library('ComplexUpset')
# library("ComplexHeatmap")
library(mice)
# library(VIM)
library(impute)
library(BatchQC)
library(sva)
source("./src/codebase.R")})
dist_histogram <- function(df, col, title){
ggplot(df, aes_string(x=col)) +
geom_histogram(aes(y=..density..),
binwidth=.5,
colour="black", fill="white") +
geom_density(alpha=.2, fill="#FF6666", adjust = 3)+
ggtitle(title)
}
histogram <- function(df, col, title){
ggplot(df, aes_string(x=col)) +
geom_histogram(binwidth=.5,
colour="black",
fill="paleturquoise1")+
ggtitle(title)
}
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
subject_2020 <- read_tsv(here("./data/2020LD_subject.tsv"), show_col_types = FALSE)
subject_2021 <- read_tsv(here("./data/2021LD_subject.tsv"), show_col_types = FALSE)
specimen_2020 <- read_tsv(here("./data/2020LD_specimen.tsv"), show_col_types = FALSE)
specimen_2021 <- read_tsv(here("./data/2021LD_specimen.tsv"), show_col_types = FALSE)
metaDf_2020 <- specimen_2020 %>%
left_join(subject_2020) %>%
mutate(timepoint= planned_day_relative_to_boost)
## Joining with `by = join_by(subject_id)`
metaDf_2021 <- specimen_2021 %>%
left_join(subject_2021) %>%
mutate(timepoint= planned_day_relative_to_boost)
## Joining with `by = join_by(subject_id)`
metaDf_2021 <- metaDf_2021[metaDf_2021$timepoint %in% c(0, 1, 3, 7, 14),]
batch.factors <- c("timepoint","infancy_vac","biological_sex")
aMeta <- rbind(metaDf_2020, metaDf_2021)
aMeta[batch.factors] <- lapply(aMeta[batch.factors], factor)
mGenes <- read_tsv(here("./data/raw.githubusercontent.com_CMI-PB_second-challenge-train-dataset-preparation_main_data_gene_90_38_export.tsv"), show_col_types = FALSE) %>%
# filter(substr(display_label, 1,3) == "MT-")
data.frame() %>%
filter(grepl('MT-', display_label)) %>%
select(versioned_ensembl_gene_id)
mGenes <- as.vector(mGenes["versioned_ensembl_gene_id"])
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
rna_2020 <- read_tsv(here("./data/2020LD_pbmc_gene_expression.tsv"),
show_col_types = FALSE) %>%
select(specimen_id, versioned_ensembl_gene_id, tpm) %>%
filter(!(versioned_ensembl_gene_id %in% mGenes)) %>%
mutate(versioned_ensembl_gene_id=gsub("\\..*", "", versioned_ensembl_gene_id))
rna_2021 <- read_tsv(here("./data/2021LD_pbmc_gene_expression.tsv"),
show_col_types = FALSE) %>%
select(specimen_id, versioned_ensembl_gene_id, tpm) %>%
filter(!(versioned_ensembl_gene_id %in% mGenes)) %>%
mutate(versioned_ensembl_gene_id=gsub("\\..*", "", versioned_ensembl_gene_id))
tpm_shortlist <- rna_2020 %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
group_by(versioned_ensembl_gene_id) %>%
summarise(proportion = mean(tpm >= 1)) %>%
filter(proportion >= 0.8)
rna_2020 <- rna_2020 %>%
filter(versioned_ensembl_gene_id %in% tpm_shortlist$versioned_ensembl_gene_id) %>%
pivot_wider(names_from = versioned_ensembl_gene_id, values_from = tpm) %>%
column_to_rownames("specimen_id") %>%
select(where(~n_distinct(.) > 1))
tpm_shortlist <- rna_2021 %>%
mutate(specimen_id = as.integer(specimen_id)) %>%
group_by(versioned_ensembl_gene_id) %>%
summarise(proportion = mean(tpm >= 1)) %>%
filter(proportion >= 0.8)
rna_2021 <- rna_2021 %>%
filter(versioned_ensembl_gene_id %in% tpm_shortlist$versioned_ensembl_gene_id) %>%
pivot_wider(names_from = versioned_ensembl_gene_id, values_from = tpm) %>%
column_to_rownames("specimen_id") %>%
select(where(~n_distinct(.) > 1))
rna_features <- intersect(colnames(rna_2020), colnames(rna_2021))
rna_2021 <- rna_2021 %>%
as.data.frame() %>%
select(rna_features)
rna_2020 <- rna_2020 %>%
as.data.frame() %>%
select(rna_features)
# rownames(rna_2020) <- gsub("X", "", rownames(rna_2020))
# rownames(rna_2021) <- gsub("X", "", rownames(rna_2021))
rna_2020_logTrans <- log2(rna_2020+1)
rna_2021_logTrans <- log2(rna_2021+1)
mRna <- melt(rna_2020)
dist_histogram(mRna, "value", "RNA seq Datasets 2020")
outliers <- boxplot(mRna$value, plot=FALSE)$out
dist_histogram(mRna[-which(mRna$value %in% outliers),], "value",
"RNA seq Outlier Removed Datasets 2020")
mRna <- melt(rna_2020_logTrans)
dist_histogram(mRna, "value", "log2 transformed RNA seq Datasets 2020")
mRna <- melt(rna_2021)
dist_histogram(mRna, "value", "RNA seq Datasets 2021")
outliers <- boxplot(mRna$value, plot=FALSE)$out
dist_histogram(mRna[-which(mRna$value %in% outliers),], "value",
"RNA seq Outlier Removed Datasets 2021")
mRna <- melt(rna_2021_logTrans)
dist_histogram(mRna, "value", "log2 transformed RNA seq Datasets 2021")
mRna <- melt(rna_2020)
histogram(mRna, "value", "RNA seq Datasets 2020")
outliers <- boxplot(mRna$value, plot=FALSE)$out
histogram(mRna[-which(mRna$value %in% outliers),], "value",
"RNA seq Outlier Removed Datasets 2020")
mRna <- melt(rna_2020_logTrans)
histogram(mRna, "value", "log2 transformed RNA seq Datasets 2020")
mRna <- melt(rna_2021)
histogram(mRna, "value", "RNA seq Datasets 2021")
outliers <- boxplot(mRna$value, plot=FALSE)$out
histogram(mRna[-which(mRna$value %in% outliers),], "value",
"RNA seq Outlier Removed Datasets 2021")
mRna <- melt(rna_2021_logTrans)
histogram(mRna, "value", "log2 transformed RNA seq Datasets 2021")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
rna <- rbind(rna_2020, rna_2021)
aData <- na.omit(rna) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run_rnaseq(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "RNA seq Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "RNA seq Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
rna <- rbind(rna_2020_logTrans, rna_2021_logTrans)
aData <- na.omit(rna) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run_rnaseq(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Log transformed RNA seq Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Log transformed RNA seq Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
abtiter_2020 <- read_tsv(here("./data/2020LD_plasma_ab_titer.tsv"),
show_col_types = FALSE) %>%
filter(!isotype %in% "IgE") %>%
mutate(isotype_antigen = paste0(isotype, "_", antigen)) %>%
select(specimen_id, isotype_antigen, MFI_normalised) %>%
pivot_wider(names_from = isotype_antigen, values_from = MFI_normalised) %>%
column_to_rownames("specimen_id") %>%
select(where(~n_distinct(.) > 1))
abtiter_2021 <- read_tsv(here("./data/2021LD_plasma_ab_titer.tsv"),
show_col_types = FALSE) %>%
filter(!isotype %in% "IgE") %>%
mutate(isotype_antigen = paste0(isotype, "_", antigen)) %>%
select(specimen_id, isotype_antigen, MFI_normalised) %>%
pivot_wider(names_from = isotype_antigen, values_from = MFI_normalised) %>%
column_to_rownames("specimen_id") %>%
select(where(~n_distinct(.) > 1))
abtiter_features <- intersect(colnames(abtiter_2020), colnames(abtiter_2021))
abtiter_2021 <- abtiter_2021 %>%
select(abtiter_features)
abtiter_2020 <- abtiter_2020 %>%
select(abtiter_features)
abtiter_2020_logTrans <- log2(abtiter_2020+1)
abtiter_2021_logTrans <- log2(abtiter_2021+1)
mAbtiter <- melt(abtiter_2020)
dist_histogram(mAbtiter, "value", "Abtiter Datasets 2020")
outliers <- boxplot(mAbtiter$value, plot=FALSE)$out
dist_histogram(mAbtiter[-which(mAbtiter$value %in% outliers),], "value",
"Abtiter Outlier Removed Datasets 2020")
mAbtiter <- melt(abtiter_2020_logTrans)
dist_histogram(mAbtiter, "value", "log2 transformed Abtiter Datasets 2020")
mAbtiter <- melt(abtiter_2021)
dist_histogram(mAbtiter, "value", "Abtiter Datasets 2021")
outliers <- boxplot(mAbtiter$value, plot=FALSE)$out
dist_histogram(mAbtiter[-which(mAbtiter$value %in% outliers),], "value",
"Abtiter Outlier Removed Datasets 2021")
mAbtiter <- melt(abtiter_2021_logTrans)
dist_histogram(mAbtiter, "value", "log2 transformed Abtiter Datasets 2021")
mAbtiter <- melt(abtiter_2020)
histogram(mAbtiter, "value", "Abtiter Datasets 2020")
outliers <- boxplot(mAbtiter$value, plot=FALSE)$out
histogram(mAbtiter[-which(mAbtiter$value %in% outliers),], "value",
"Abtiter Outlier Removed Datasets 2020")
mAbtiter <- melt(abtiter_2020_logTrans)
histogram(mAbtiter, "value", "log2 transformed Abtiter Datasets 2020")
mAbtiter <- melt(abtiter_2021)
histogram(mAbtiter, "value", "Abtiter Datasets 2021")
outliers <- boxplot(mAbtiter$value, plot=FALSE)$out
histogram(mAbtiter[-which(mAbtiter$value %in% outliers),], "value",
"Abtiter Outlier Removed Datasets 2021")
mAbtiter <- melt(abtiter_2021_logTrans)
histogram(mAbtiter, "value", "log2 transformed Abtiter Datasets 2021")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
abtiter <- rbind(abtiter_2020, abtiter_2021)
aData <- na.omit(abtiter) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Ab-titer Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Ab-titer Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
abtiter <- rbind(abtiter_2020, abtiter_2021)
aData <- na.omit(abtiter) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Log transformed Ab-titer Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Log transformed Ab-titer Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
cytof_2021 <- read_tsv(here("./data/2021LD_pbmc_cell_frequency.tsv"),
show_col_types = FALSE) %>%
select(specimen_id, cell_type_name, percent_live_cell) %>%
pivot_wider(names_from = cell_type_name,
values_from = percent_live_cell) %>%
column_to_rownames("specimen_id") %>%
select(where(~n_distinct(.) > 1))%>%
as.matrix() %>%
impute.knn() %>%
.$data
cytof_2020 <- read_tsv(here("./data/2020LD_pbmc_cell_frequency.tsv"),
show_col_types = FALSE) %>%
select(specimen_id, cell_type_name, percent_live_cell) %>%
pivot_wider(names_from = cell_type_name,
values_from = percent_live_cell) %>%
column_to_rownames("specimen_id") %>%
select(where(~n_distinct(.) > 1))%>%
as.matrix() %>%
impute.knn() %>%
.$data
cell_features <- intersect(colnames(cytof_2020), colnames(cytof_2021))
cytof_2021 <- as.data.frame(cytof_2021) %>%
select(cell_features)
cytof_2020 <- as.data.frame(cytof_2020) %>%
select(cell_features)
cytof_medianD0_2021 <- cytof_2021 %>%
rownames_to_column("specimen_id") %>%
mutate(specimen_id = as.numeric(specimen_id)) %>%
pivot_longer(!specimen_id, names_to = "cell_type_name", values_to = "count") %>%
filter(specimen_id %in% unique(metaDf_2021[metaDf_2021$planned_day_relative_to_boost == 0,]$specimen_id)) %>%
group_by(cell_type_name) %>%
summarise(median = median(count, na.rm = T))
cytof_2021_normalized <- cytof_2021 %>%
rownames_to_column("specimen_id") %>%
mutate(specimen_id = as.numeric(specimen_id)) %>%
pivot_longer(!specimen_id, names_to = "cell_type_name",
values_to = "percent_live_cell") %>%
left_join(cytof_medianD0_2021) %>%
mutate(percent_live_cell_normalized = if_else(is.na(percent_live_cell) == T,
NA, percent_live_cell/median))%>%
select(specimen_id, cell_type_name, percent_live_cell_normalized) %>%
pivot_wider(names_from = cell_type_name,
values_from = percent_live_cell_normalized) %>%
column_to_rownames("specimen_id")
cytof_medianD0_2020 <- cytof_2020 %>%
rownames_to_column("specimen_id") %>%
mutate(specimen_id = as.numeric(specimen_id)) %>%
pivot_longer(!specimen_id, names_to = "cell_type_name",
values_to = "count") %>%
filter(specimen_id %in% unique(metaDf_2020[metaDf_2020$planned_day_relative_to_boost == 0,]$specimen_id)) %>%
group_by(cell_type_name) %>%
summarise(median = median(count, na.rm = T))
cytof_2020_normalized <- cytof_2020 %>%
rownames_to_column("specimen_id") %>%
mutate(specimen_id = as.numeric(specimen_id)) %>%
pivot_longer(!specimen_id, names_to = "cell_type_name",
values_to = "percent_live_cell") %>%
left_join(cytof_medianD0_2020) %>%
mutate(percent_live_cell_normalized = if_else(is.na(percent_live_cell) == T,
NA, percent_live_cell/median))%>%
select(specimen_id, cell_type_name, percent_live_cell_normalized) %>%
pivot_wider(names_from = cell_type_name,
values_from = percent_live_cell_normalized) %>%
column_to_rownames("specimen_id")
cytof_2020_sqrtTrans <- sqrt(cytof_2020_normalized)
cytof_2021_sqrtTrans <- sqrt(cytof_2021_normalized)
mCytof <- melt(cytof_2020)
dist_histogram(mCytof, "value", "Cell Frequency Datasets 2020")
mCytof <- melt(cytof_2020_normalized)
dist_histogram(mCytof, "value",
"Cell Frequency Normalized Datasets 2020")
mCytof <- melt(cytof_2020_sqrtTrans)
dist_histogram(mCytof, "value", "Square root transformed Cell Frequency Datasets 2020")
mCytof <- melt(cytof_2021)
dist_histogram(mCytof, "value", "Cell Frequency Datasets 2021")
mCytof <- melt(cytof_2021_normalized)
dist_histogram(mCytof, "value",
"Cell Frequency Normalized Datasets 2021")
mCytof <- melt(cytof_2021_sqrtTrans)
dist_histogram(mCytof, "value", "Square root transformed Cell Frequency Datasets 2021")
mCytof <- melt(cytof_2020)
histogram(mCytof, "value", "Cell Frequency Datasets 2020")
mCytof <- melt(cytof_2020_normalized)
histogram(mCytof, "value",
"Cell Frequency Normalized Datasets 2020")
mCytof <- melt(cytof_2020_sqrtTrans)
histogram(mCytof, "value", "Square root transformed Cell Frequency Datasets 2020")
mCytof <- melt(cytof_2021)
histogram(mCytof, "value", "Cell Frequency Datasets 2021")
mCytof <- melt(cytof_2021_normalized)
histogram(mCytof, "value",
"Cell Frequency Normalized Datasets 2021")
mCytof <- melt(cytof_2021_sqrtTrans)
histogram(mCytof, "value", "Square root transformed Cell Frequency Datasets 2021")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
cytof <- rbind(cytof_2020, cytof_2021)
aData <- na.omit(cytof) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Cell Frequency Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Cell Frequency Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
cytof <- rbind(cytof_2020_normalized, cytof_2021_normalized)
aData <- na.omit(cytof) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Normalized Cell Frequency Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Normalized Cell Frequency Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
cytof <- rbind(cytof_2020_sqrtTrans, cytof_2021_sqrtTrans)
aData <- na.omit(cytof) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Square root Cell Frequency Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Square root Cell Frequency Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
olink_2020 <- read_tsv(here("./data/2020LD_plasma_cytokine_concentration.tsv"), show_col_types = FALSE) %>%
filter(protein_id != "P05112") %>%
select(specimen_id, protein_id, protein_expression) %>%
pivot_wider(names_from = protein_id,
values_from = protein_expression) %>%
column_to_rownames("specimen_id") %>%
as.matrix() %>%
impute.knn() %>%
.$data
olink_2021 <- read_tsv(here("./data/2021LD_plasma_cytokine_concentration.tsv"), show_col_types = FALSE) %>%
filter(protein_id != "P05112") %>%
select(specimen_id, protein_id, protein_expression) %>%
pivot_wider(names_from = protein_id,
values_from = protein_expression) %>%
column_to_rownames("specimen_id") %>%
as.matrix() %>%
impute.knn() %>%
.$data
olink_features <- intersect(colnames(olink_2020), colnames(olink_2021))
olink_2021 <- as.data.frame(olink_2021) %>%
select(olink_features)
olink_2020 <- as.data.frame(olink_2020) %>%
select(olink_features)
olink_medianD0_2021 <- olink_2021 %>%
rownames_to_column("specimen_id") %>%
mutate(specimen_id = as.numeric(specimen_id)) %>%
pivot_longer(!specimen_id, names_to = "protein_id", values_to = "count") %>%
filter(specimen_id %in% unique(metaDf_2021[metaDf_2021$planned_day_relative_to_boost == 0,]$specimen_id)) %>%
group_by(protein_id) %>%
summarise(median = median(count, na.rm = T))
olink_2021_normalized <- olink_2021 %>%
rownames_to_column("specimen_id") %>%
mutate(specimen_id = as.numeric(specimen_id)) %>%
pivot_longer(!specimen_id, names_to = "protein_id",
values_to = "protein_expression") %>%
left_join(olink_medianD0_2021) %>%
mutate(protein_expression_normalized = if_else(is.na(protein_expression) == T,
NA, protein_expression/median))%>%
select(specimen_id, protein_id, protein_expression_normalized) %>%
pivot_wider(names_from = protein_id,
values_from = protein_expression_normalized) %>%
column_to_rownames("specimen_id")
olink_medianD0_2020 <- olink_2020 %>%
rownames_to_column("specimen_id") %>%
mutate(specimen_id = as.numeric(specimen_id)) %>%
pivot_longer(!specimen_id, names_to = "protein_id", values_to = "count") %>%
filter(specimen_id %in% unique(metaDf_2020[metaDf_2020$planned_day_relative_to_boost == 0,]$specimen_id)) %>%
group_by(protein_id) %>%
summarise(median = median(count, na.rm = T))
olink_2020_normalized <- olink_2020 %>%
rownames_to_column("specimen_id") %>%
mutate(specimen_id = as.numeric(specimen_id)) %>%
pivot_longer(!specimen_id, names_to = "protein_id",
values_to = "protein_expression") %>%
left_join(olink_medianD0_2020) %>%
mutate(protein_expression_normalized = if_else(is.na(protein_expression) == T,
NA, protein_expression/median))%>%
select(specimen_id, protein_id, protein_expression_normalized) %>%
pivot_wider(names_from = protein_id,
values_from = protein_expression_normalized) %>%
column_to_rownames("specimen_id")
olink_2020_logTrans <- log2(olink_2020_normalized+1)
olink_2021_logTrans <- log2(olink_2021_normalized+1)
mOlink <- melt(olink_2020)
dist_histogram(mOlink, "value", "Olink Datasets 2020")
# outliers <- boxplot(mOlink$value, plot=FALSE)$out
# dist_histogram(mOlink[mOlink$value <30,] "value",
# "Olink Outlier Removed Datasets 2020")
mOlink <- melt(olink_2020_normalized)
dist_histogram(mOlink, "value", "Olink Normalized Datasets 2020")
mOlink <- melt(olink_2020_logTrans)
dist_histogram(mOlink, "value", "log2 transformed Olink Datasets 2020")
mOlink <- melt(olink_2021)
dist_histogram(mOlink, "value", "Olink Datasets 2021")
dist_histogram(mOlink[mOlink$value <30,], "value",
"Olink Outlier Removed Datasets 2021")
mOlink <- melt(olink_2021_normalized)
dist_histogram(mOlink, "value", "Olink Normalized Datasets 2021")
mOlink <- melt(olink_2021_logTrans)
dist_histogram(mOlink, "value", "log2 transformed Olink Datasets 2021")
mOlink <- melt(olink_2020)
histogram(mOlink, "value", "Olink Datasets 2020")
# outliers <- boxplot(mOlink$value, plot=FALSE)$out
# dist_histogram(mOlink[mOlink$value <30,] "value",
# "Olink Outlier Removed Datasets 2020")
mOlink <- melt(olink_2020_normalized)
histogram(mOlink, "value", "Olink Normalized Datasets 2020")
mOlink <- melt(olink_2020_logTrans)
histogram(mOlink, "value", "log2 transformed Olink Datasets 2020")
mOlink <- melt(olink_2021)
histogram(mOlink, "value", "Olink Datasets 2021")
histogram(mOlink[mOlink$value <30,], "value",
"Olink Outlier Removed Datasets 2021")
mOlink <- melt(olink_2021_normalized)
dist_histogram(mOlink, "value", "Olink Normalized Datasets 2021")
mOlink <- melt(olink_2021_logTrans)
histogram(mOlink, "value", "log2 transformed Olink Datasets 2021")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
olink <- rbind(olink_2020, olink_2021)
aData <- na.omit(olink) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Olink Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Olink Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
olink <- rbind(olink_2020_normalized, olink_2021_normalized)
aData <- na.omit(olink) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Normalized Olink Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Normalized Olink Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
olink <- rbind(olink_2020_logTrans, olink_2021_logTrans)
aData <- na.omit(olink) %>%
t()
metaData <- aMeta[aMeta$specimen_id %in% colnames(aData),c("dataset", "specimen_id", batch.factors)]
rownames(metaData) <- metaData$specimen_id
phenoData = AnnotatedDataFrame(data = metaData)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Log transformed Olink Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Log transformed Olink Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
dataList_2020 <- list()
multiome_2020_backbone <- rna_2020 %>%
rownames_to_column("specimen_id") %>%
select(specimen_id)
abtiter_2020_multi <- multiome_2020_backbone %>%
left_join(abtiter_2020 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
cytof_2020_multi <- multiome_2020_backbone %>%
left_join(cytof_2020 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
olink_2020_multi <- multiome_2020_backbone %>%
left_join(olink_2020 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
metaDf_2020_multi <- multiome_2020_backbone %>%
left_join(metaDf_2020 %>%
mutate_at('specimen_id', as.character)) %>%
rename(sample=specimen_id)
dataList_2020[['original']] <- list(
rnaseq=t(rna_2020),
abtiter=t(abtiter_2020_multi),
cytof=t(cytof_2020_multi),
olink=t(olink_2020_multi))
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
dataList_2020 <- list()
multiome_2020_backbone <- rna_2020 %>%
rownames_to_column("specimen_id") %>%
select(specimen_id)
abtiter_2020_multi <- multiome_2020_backbone %>%
left_join(abtiter_2020 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
cytof_2020_multi <- multiome_2020_backbone %>%
left_join(cytof_2020 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
olink_2020_multi <- multiome_2020_backbone %>%
left_join(olink_2020 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
metaDf_2020_multi <- multiome_2020_backbone %>%
left_join(metaDf_2020 %>%
mutate_at('specimen_id', as.character)) %>%
rename(sample=specimen_id)
dataList_2020[['original']] <- list(
rnaseq=t(rna_2020),
abtiter=t(abtiter_2020_multi),
cytof=t(cytof_2020_multi),
olink=t(olink_2020_multi))
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
dataList_2021 <- list()
multiome_2021_backbone <- rna_2021 %>%
rownames_to_column("specimen_id") %>%
select(specimen_id)
abtiter_2021_multi <- multiome_2021_backbone %>%
left_join(abtiter_2021 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
cytof_2021_multi <- multiome_2021_backbone %>%
left_join(cytof_2021 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
olink_2021_multi <- multiome_2021_backbone %>%
left_join(olink_2021 %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
dataList_2021[['original']] <- list(
rnaseq=t(rna_2021),
abtiter=t(abtiter_2021_multi),
cytof=t(cytof_2021_multi),
olink=t(olink_2021_multi))
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
multiome_2020_backbone <- rna_2020_logTrans %>%
rownames_to_column("specimen_id") %>%
select(specimen_id)
abtiter_2020_multi <- multiome_2020_backbone %>%
left_join(abtiter_2020_logTrans %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
cytof_2020_multi <- multiome_2020_backbone %>%
left_join(cytof_2020_sqrtTrans %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
olink_2020_multi <- multiome_2020_backbone %>%
left_join(olink_2020_logTrans %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
metaDf_2021_multi <- multiome_2021_backbone %>%
left_join(metaDf_2021 %>%
mutate_at('specimen_id', as.character)) %>%
rename(sample=specimen_id)
dataList_2020$transformed <- list(rnaseq=t(rna_2020_logTrans),
abtiter=t(abtiter_2020_multi),
cytof=t(cytof_2020_multi),
olink=t(olink_2020_multi))
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
multiome_2021_backbone <- rna_2021_logTrans %>%
rownames_to_column("specimen_id") %>%
select(specimen_id)
abtiter_2021_multi <- multiome_2021_backbone %>%
left_join(abtiter_2021_logTrans %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
cytof_2021_multi <- multiome_2021_backbone %>%
left_join(cytof_2021_sqrtTrans %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
olink_2021_multi <- multiome_2021_backbone %>%
left_join(olink_2021_logTrans %>% rownames_to_column("specimen_id")) %>%
column_to_rownames("specimen_id")
metaDf_2021_multi <- multiome_2021_backbone %>%
left_join(metaDf_2021 %>%
mutate_at('specimen_id', as.character)) %>%
rename(sample=specimen_id)
dataList_2021$transformed <- list(rnaseq=t(rna_2021_logTrans),
abtiter=t(abtiter_2021_multi),
cytof=t(cytof_2021_multi),
olink=t(olink_2021_multi))
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
factor_number <- 15
transformedData <- lapply(dataList_2020[['transformed']], as.matrix)
MOFAobject_2020 <- create_mofa(transformedData)
plot_data_overview(MOFAobject_2020)
data_opts <- get_default_data_options(MOFAobject_2020)
model_opts <- get_default_model_options(MOFAobject_2020)
model_opts$num_factors <- factor_number
train_opts <- get_default_training_options(MOFAobject_2020)
train_opts$convergence_mode <- "fast"
train_opts$seed <- 42
MOFAobject_2020 <- prepare_mofa(MOFAobject_2020,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
MOFAobject_2020 <- run_mofa(MOFAobject_2020, use_basilisk=TRUE)
plot_factor_cor(MOFAobject_2020)
plot_variance_explained(MOFAobject_2020, max_r2=factor_number)
plot_variance_explained(MOFAobject_2020, plot_total = T)[[2]]
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
factor_number <- 15
transformedData <- lapply(dataList_2021[['transformed']], as.matrix)
MOFAobject_2021 <- create_mofa(transformedData)
plot_data_overview(MOFAobject_2021)
data_opts <- get_default_data_options(MOFAobject_2021)
model_opts <- get_default_model_options(MOFAobject_2021)
model_opts$num_factors <- factor_number
train_opts <- get_default_training_options(MOFAobject_2021)
train_opts$convergence_mode <- "fast"
train_opts$seed <- 42
MOFAobject_2021<- prepare_mofa(MOFAobject_2021,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
MOFAobject_2021 <- run_mofa(MOFAobject_2021, use_basilisk = TRUE)
plot_factor_cor(MOFAobject_2021)
plot_variance_explained(MOFAobject_2021, max_r2=factor_number)
plot_variance_explained(MOFAobject_2021, plot_total = T)[[2]]
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# MOFAobject_mofaImputed <- impute(MOFAobject_2020, views = "all")
# # samples_metadata(MOFAobject_mofaImputed) <- metaDf_2020_multi
# MOFAobject_mofaImputed
#
# results <- MOFAobject_mofaImputed@imputed_data
MOFAobject_mofaImputed <- MOFA2::impute(MOFAobject_2020, views = "all")
samples_metadata(MOFAobject_mofaImputed) <- metaDf_2020_multi
MOFAobject_mofaImputed
## Trained MOFA with the following characteristics:
## Number of views: 4
## Views names: rnaseq abtiter cytof olink
## Number of features (per view): 14608 31 22 29
## Number of groups: 1
## Groups names: group1
## Number of samples (per group): 180
## Number of factors: 15
results <- MOFAobject_mofaImputed@imputed_data
dataList_2020$mofaImputed <- list()
dataList_2020$mofaImputed[["rnaseq"]] <- results$rnaseq[["group1"]]
dataList_2020$mofaImputed[["abtiter"]] <- results$abtiter[["group1"]]
dataList_2020$mofaImputed[["cytof"]] <- results$cytof[["group1"]]
dataList_2020$mofaImputed[["olink"]] <- results$olink[["group1"]]
factor_number <- 15
MOFAobject <- create_mofa(dataList_2020$mofaImputed)
plot_data_overview(MOFAobject)
data_opts <- get_default_data_options(MOFAobject)
model_opts <- get_default_model_options(MOFAobject)
model_opts$num_factors <- factor_number
train_opts <- get_default_training_options(MOFAobject)
train_opts$convergence_mode <- "fast"
train_opts$seed <- 42
MOFAobject <- prepare_mofa(MOFAobject,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
MOFAobject <- run_mofa(MOFAobject, use_basilisk = TRUE)
plot_factor_cor(MOFAobject)
plot_variance_explained(MOFAobject, max_r2=factor_number)
plot_variance_explained(MOFAobject, plot_total = T)[[2]]
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
MOFAobject_mofaImputed <- MOFA2::impute(MOFAobject_2021, views = "all")
samples_metadata(MOFAobject_mofaImputed) <- metaDf_2021_multi
MOFAobject_mofaImputed
## Trained MOFA with the following characteristics:
## Number of views: 4
## Views names: rnaseq abtiter cytof olink
## Number of features (per view): 14608 31 22 29
## Number of groups: 1
## Groups names: group1
## Number of samples (per group): 180
## Number of factors: 15
results <- MOFAobject_mofaImputed@imputed_data
dataList_2021$mofaImputed <- list()
dataList_2021$mofaImputed[["rnaseq"]] <- results$rnaseq[["group1"]]
dataList_2021$mofaImputed[["abtiter"]] <- results$abtiter[["group1"]]
dataList_2021$mofaImputed[["cytof"]] <- results$cytof[["group1"]]
dataList_2021$mofaImputed[["olink"]] <- results$olink[["group1"]]
factor_number <- 15
MOFAobject <- create_mofa(dataList_2020$mofaImputed)
plot_data_overview(MOFAobject)
data_opts <- get_default_data_options(MOFAobject)
model_opts <- get_default_model_options(MOFAobject)
model_opts$num_factors <- factor_number
train_opts <- get_default_training_options(MOFAobject)
train_opts$convergence_mode <- "fast"
train_opts$seed <- 42
MOFAobject <- prepare_mofa(MOFAobject,
data_options = data_opts,
model_options = model_opts,
training_options = train_opts
)
MOFAobject <- run_mofa(MOFAobject, use_basilisk = TRUE)
plot_factor_cor(MOFAobject)
plot_variance_explained(MOFAobject, max_r2=factor_number)
plot_variance_explained(MOFAobject, plot_total = T)[[2]]
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
mData <- melt(dataList_2020$mofaImputed[["abtiter"]])
dist_histogram(mData, "value", "Imputed Ab-titer Datasets 2020")
mData <- melt(dataList_2021$mofaImputed[["abtiter"]])
dist_histogram(mData, "value", "Imputed Ab-titer Datasets 2021")
mData <- melt(dataList_2020$mofaImputed[["cytof"]])
dist_histogram(mData, "value", "Imputed Cell Frequency Datasets 2020")
mData <- melt(dataList_2021$mofaImputed[["cytof"]])
dist_histogram(mData, "value", "Imputed Cell Frequency Datasets 2021")
mData <- melt(dataList_2020$mofaImputed[["olink"]])
dist_histogram(mData, "value", "Imputed Olink Datasets 2020")
mData <- melt(dataList_2021$mofaImputed[["olink"]])
dist_histogram(mData, "value", "Imputed Olink Datasets 2021")
mData <- melt(dataList_2020$mofaImputed[["abtiter"]])
histogram(mData, "value", "Imputed Ab-titer Datasets 2020")
mData <- melt(dataList_2021$mofaImputed[["abtiter"]])
histogram(mData, "value", "Imputed Ab-titer Datasets 2021")
mData <- melt(dataList_2020$mofaImputed[["cytof"]])
histogram(mData, "value", "Imputed Cell Frequency Datasets 2020")
mData <- melt(dataList_2021$mofaImputed[["cytof"]])
histogram(mData, "value", "Imputed Cell Frequency Datasets 2021")
mData <- melt(dataList_2020$mofaImputed[["olink"]])
histogram(mData, "value", "Imputed Olink Datasets 2020")
mData <- melt(dataList_2021$mofaImputed[["olink"]])
histogram(mData, "value", "Imputed Olink Datasets 2021")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
barPlot <- function(df, title){
ggplot(data=df, aes(x=names, y=values, fill=variables)) +
geom_bar(stat="identity", position=position_dodge()) +
# geom_text(aes(label=values), vjust=1.6, color="black",
# position = position_dodge(0.9), size=2) +
theme(axis.text.x = element_text(angle = 45,
vjust = 1,
hjust = 1,
size = 8)) +
ggtitle(title)
}
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
abtiter_2020_vars <- data.frame()
abtiter_2020_vars <- abtiter_2020_logTrans %>%
t() %>%
data.frame() %>%
mutate(original= rowVars(.)) %>%
select("original") %>%
# mutate_all(funs(round(.,0))) %>%
rownames_to_column("names")
abtiter_2020_vars <- dataList_2020$mofaImputed[["abtiter"]] %>%
data.frame() %>%
mutate(imputed= rowVars(.)) %>%
select("imputed") %>%
rownames_to_column("names") %>%
left_join(abtiter_2020_vars) %>%
pivot_longer(!names, names_to = "variables", values_to = "values") %>%
data.frame()
barPlot(abtiter_2020_vars, "Feature variance chaneg by imputation Ab-titer 2020")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
abtiter_2021_vars <- data.frame()
abtiter_2021_vars <- abtiter_2021_logTrans %>%
t() %>%
data.frame() %>%
mutate(original= rowVars(.)) %>%
select("original") %>%
# mutate_all(funs(round(.,0))) %>%
rownames_to_column("names")
abtiter_2021_vars <- dataList_2021$mofaImputed[["abtiter"]] %>%
data.frame() %>%
mutate(imputed= rowVars(.)) %>%
select("imputed") %>%
rownames_to_column("names") %>%
left_join(abtiter_2021_vars) %>%
pivot_longer(!names, names_to = "variables", values_to = "values") %>%
data.frame()
barPlot(abtiter_2021_vars, "Feature variance chaneg by imputation Ab-titer 2021")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
cytof_2020_vars <- data.frame()
cytof_2020_vars <- cytof_2020_sqrtTrans %>%
t() %>%
data.frame() %>%
mutate(original= rowVars(.)) %>%
select("original") %>%
# mutate_all(funs(round(.,0))) %>%
rownames_to_column("names")
cytof_2020_vars <- dataList_2020$mofaImputed[["cytof"]] %>%
data.frame() %>%
mutate(imputed= rowVars(.)) %>%
select("imputed") %>%
rownames_to_column("names") %>%
left_join(cytof_2020_vars) %>%
pivot_longer(!names, names_to = "variables", values_to = "values") %>%
data.frame()
barPlot(cytof_2020_vars, "Feature variance chaneg by imputation Cell Frequency 2020")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
cytof_2021_vars <- data.frame()
cytof_2021_vars <- cytof_2021_sqrtTrans %>%
t() %>%
data.frame() %>%
mutate(original= rowVars(.)) %>%
select("original") %>%
# mutate_all(funs(round(.,0))) %>%
rownames_to_column("names")
cytof_2021_vars <- dataList_2021$mofaImputed[["cytof"]] %>%
data.frame() %>%
mutate(imputed= rowVars(.)) %>%
select("imputed") %>%
rownames_to_column("names") %>%
left_join(cytof_2021_vars) %>%
pivot_longer(!names, names_to = "variables", values_to = "values") %>%
data.frame()
barPlot(cytof_2021_vars, "Feature variance chaneg by imputation Cell Frequency 2021")
#### olink 2020
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
olink_2020_vars <- data.frame()
olink_2020_vars <- olink_2020_logTrans %>%
t() %>%
data.frame() %>%
mutate(original= rowVars(.)) %>%
select("original") %>%
# mutate_all(funs(round(.,0))) %>%
rownames_to_column("names")
olink_2020_vars <- dataList_2020$mofaImputed[["olink"]] %>%
data.frame() %>%
mutate(imputed= rowVars(.)) %>%
select("imputed") %>%
rownames_to_column("names") %>%
left_join(olink_2020_vars) %>%
pivot_longer(!names, names_to = "variables", values_to = "values") %>%
data.frame()
barPlot(olink_2020_vars, "Feature variance chaneg by imputation Olink 2020")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
olink_2021_vars <- data.frame()
olink_2021_vars <- olink_2021_logTrans %>%
t() %>%
data.frame() %>%
mutate(original= rowVars(.)) %>%
select("original") %>%
# mutate_all(funs(round(.,0))) %>%
rownames_to_column("names")
olink_2021_vars <- dataList_2021$mofaImputed[["olink"]] %>%
data.frame() %>%
mutate(imputed= rowVars(.)) %>%
select("imputed") %>%
rownames_to_column("names") %>%
left_join(olink_2021_vars) %>%
pivot_longer(!names, names_to = "variables", values_to = "values") %>%
data.frame()
barPlot(olink_2021_vars, "Feature variance chaneg by imputation Olink 2021")
#### variance of all values
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
ab2020_org <- abtiter_2020_logTrans %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
ab2021_org <- abtiter_2021_logTrans %>%
# data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
ab2020_imp <- dataList_2020$mofaImputed[["abtiter"]] %>%
data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
ab2021_imp <- dataList_2021$mofaImputed[["abtiter"]] %>%
data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
cytof2020_org <- cytof_2020_sqrtTrans %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
cytof2021_org <- cytof_2021_sqrtTrans %>%
# data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
cytof2020_imp <- dataList_2020$mofaImputed[["cytof"]] %>%
data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
cytof2021_imp <- dataList_2021$mofaImputed[["cytof"]] %>%
data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
olink2020_org <- olink_2020_logTrans %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
olink2021_org <- olink_2021_logTrans %>%
# data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
olink2020_imp <- dataList_2020$mofaImputed[["olink"]] %>%
data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
olink2021_imp <- dataList_2021$mofaImputed[["olink"]] %>%
data.frame() %>%
pivot_longer(cols = everything(),
names_to = 'samples',
values_to = "values") %>%
summarise(var(values, na.rm = TRUE)) %>%
as.vector()
varDf <- data.frame(experiment=c(rep("abtiter", 4), rep("cytof", 4), rep("olink", 4)),
variables=c(rep(c(rep('original', 2), rep('imputed', 2)), 3)),
dataset=c(rep(c("2020", "2021"), 6)),
values=c(ab2020_org[[1]], ab2021_org[[1]], ab2020_imp[[1]], ab2021_imp[[1]],
cytof2020_org[[1]], cytof2021_org[[1]], cytof2020_imp[[1]], cytof2021_imp[[1]],
olink2020_org[[1]], olink2021_org[[1]], olink2020_imp[[1]], olink2021_imp[[1]]))
barPlot <- function(df, title){
ggplot(data=df, aes(x=names, y=values, fill=variables)) +
geom_bar(stat="identity", position=position_dodge()) +
geom_text(aes(label=values), vjust=0, color="black",
position = position_dodge(0.9), size=4) +
theme(axis.text.x = element_text(angle = 45,
vjust = 1,
hjust = 1,
size = 8)) +
ggtitle(title)}
varDf <- varDf %>%
mutate(across(c('values'),round, 2)) %>%
unite(names, c("experiment", "dataset"))
barPlot(varDf, "Experiments variance chaneg by imputation")
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
bcMeta <- rbind(metaDf_2020_multi, metaDf_2021_multi) %>%
column_to_rownames("sample")
batchCorrected_list <- list()
rna <- cbind(dataList_2020$mofaImputed[["rnaseq"]], dataList_2021$mofaImputed[["rnaseq"]])
batch <- c(rep('2020', dim(multiome_2020_backbone)[1]), rep('2021', dim(multiome_2021_backbone)[1]))
aData <- ComBat(rna, batch = batch)
batchCorrected_list[["rnaseq"]] <- aData
phenoData = AnnotatedDataFrame(data = bcMeta)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run_rnaseq(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Batch Corrected RNA Seq Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Batch Corrected RNA Seq Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
abtiter <- cbind(dataList_2020$mofaImputed[["abtiter"]], dataList_2021$mofaImputed[["abtiter"]])
batch <- c(rep('2020', dim(multiome_2020_backbone)[1]), rep('2021', dim(multiome_2021_backbone)[1]))
aData <- ComBat(abtiter, batch = batch)
batchCorrected_list[["abtiter"]] <- aData
phenoData = AnnotatedDataFrame(data = bcMeta)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Batch Corrected Ab-titer Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Batch Corrected Ab-titer Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
cytof <- cbind(dataList_2020$mofaImputed[["cytof"]], dataList_2021$mofaImputed[["cytof"]])
batch <- c(rep('2020', dim(multiome_2020_backbone)[1]), rep('2021', dim(multiome_2021_backbone)[1]))
aData <- ComBat(cytof, batch = batch)
batchCorrected_list[["cytof"]] <- aData
phenoData = AnnotatedDataFrame(data = bcMeta)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Batch Corrected Cell Frequency Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Batch Corrected Cell Frequency Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)
olink <- cbind(dataList_2020$mofaImputed[["olink"]], dataList_2021$mofaImputed[["olink"]])
batch <- c(rep('2020', dim(multiome_2020_backbone)[1]), rep('2021', dim(multiome_2021_backbone)[1]))
aData <- ComBat(olink, batch = batch)
batchCorrected_list[["olink"]] <- aData
phenoData = AnnotatedDataFrame(data = bcMeta)
exprset = ExpressionSet(as.matrix(aData[, sampleNames(phenoData)]),
phenoData = phenoData)
exprs(exprset)
pvcaObj_after <- pvca_run(exprset, batch.factors = c("dataset", batch.factors), threshold =0.6)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
plot_pvca(pvcaObj_after, "Batch Corrected Olink Datasets 2020 and 2021")
pca_plots = plot_pca(aData, metaData, "Batch Corrected Olink Datasets 2020 and 2021")
plot(pca_plots$PC12)
plot(pca_plots$PC13)
plot(pca_plots$PC23)